/*
 * sim-profile.c - sample functional simulator implementation w/ profiling
 *
 * This file is a part of the SimpleScalar tool suite written by
 * Todd M. Austin as a part of the Multiscalar Research Project.
 *  
 * The tool suite is currently maintained by Doug Burger and Todd M. Austin.
 * 
 * Copyright (C) 1994, 1995, 1996, 1997, 1998 by Todd M. Austin
 *
 * This source file is distributed "as is" in the hope that it will be
 * useful.  The tool set comes with no warranty, and no author or
 * distributor accepts any responsibility for the consequences of its
 * use. 
 * 
 * Everyone is granted permission to copy, modify and redistribute
 * this tool set under the following conditions:
 * 
 *    This source code is distributed for non-commercial use only. 
 *    Please contact the maintainer for restrictions applying to 
 *    commercial use.
 *
 *    Permission is granted to anyone to make or distribute copies
 *    of this source code, either as received or modified, in any
 *    medium, provided that all copyright notices, permission and
 *    nonwarranty notices are preserved, and that the distributor
 *    grants the recipient permission for further redistribution as
 *    permitted by this document.
 *
 *    Permission is granted to distribute this file in compiled
 *    or executable form under the same conditions that apply for
 *    source code, provided that either:
 *
 *    A. it is accompanied by the corresponding machine-readable
 *       source code,
 *    B. it is accompanied by a written offer, with no time limit,
 *       to give anyone a machine-readable copy of the corresponding
 *       source code in return for reimbursement of the cost of
 *       distribution.  This written offer must permit verbatim
 *       duplication by anyone, or
 *    C. it is distributed by someone who received only the
 *       executable form, and is accompanied by a copy of the
 *       written offer of source code that they received concurrently.
 *
 * In other words, you are welcome to use, share and improve this
 * source file.  You are forbidden to forbid anyone else to use, share
 * and improve what you give them.
 *
 * INTERNET: dburger@cs.wisc.edu
 * US Mail:  1210 W. Dayton Street, Madison, WI 53706
 *
 * $Id: sim-profile.c,v 1.1.1.1 2000/11/29 14:53:54 cu-cs Exp $
 *
 * $Log: sim-profile.c,v $
 * 2003/10/5 rramaswa
 * Modified to PacketBench specifications
 * Lots of new flags added
 *
 * Revision 1.1.1.1  2000/11/29 14:53:54  cu-cs
 * Grand unification of arm sources.
 *
 *
 * Revision 1.1.1.1.2.1  2000/11/14 05:32:22  taustin
 * Sim-profile now works for ARM (mostly).
 *
 * Revision 1.1.1.1  2000/05/26 15:18:59  taustin
 * SimpleScalar Tool Set
 *
 *
 * Revision 1.4  1999/12/31 18:53:04  taustin
 * quad_t naming conflicts removed
 *
 * Revision 1.3  1999/12/13 18:46:51  taustin
 * cross endian execution support added
 *
 * Revision 1.2  1998/08/27 16:37:03  taustin
 * implemented host interface description in host.h
 * added target interface support
 * added support for register and memory contexts
 * instruction predecoding moved to loader module
 * Alpha target support added
 * added support for qword's
 * added fault support
 * added option ("-max:inst") to limit number of instructions analyzed
 * added target-dependent myprintf() support
 *
 * Revision 1.1  1997/03/11  01:33:36  taustin
 * Initial revision
 *
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

#include "host.h"
#include "misc.h"
#include "machine.h"
#include "regs.h"
#include "memory.h"
#include "loader.h"
#include "syscall.h"
#include "dlite.h"
#include "symbol.h"
#include "options.h"
#include "stats.h"
#include "sim.h"

/*
 * This file implements a functional simulator with profiling support.  Run
 * with the `-h' flag to see profiling options available.
 */

/* simulated registers */
static struct regs_t regs;

/* simulated memory */
static struct mem_t *mem = NULL;

/* track number of refs */
static counter_t sim_num_refs = 0;

/* maximum number of inst's to execute */
static unsigned int max_insts;

/* profiling options */
static int prof_all /* = FALSE */;
static int prof_ic /* = FALSE */;
static int prof_inst /* = FALSE */;
static int prof_bc /* = FALSE */;
static int prof_am /* = FALSE */;
static int prof_seg /* = FALSE */;
static int prof_tsyms /* = FALSE */;
static int prof_dsyms /* = FALSE */;
static int load_locals /* = FALSE */;
static int prof_taddr /* = FALSE */;

/* text-based stat profiles */
#define MAX_PCSTAT_VARS 8
static int pcstat_nelt = 0;
static char *pcstat_vars[MAX_PCSTAT_VARS];

//PacketBench (instructions per function call)
static counter_t sim_insn_func = 0;
//PacketBench (number of function calls, i.e. no. of packets)
static unsigned int func_calls = 0;
//PacketBench (total number of accesses to packet memory)
static counter_t mem_acc = 0;
//PacketBench (start address of function)
static unsigned int start_addr;
//PacketBench (end address of function)
static unsigned int end_addr;
//PacketBench (data memory size calculation flag)
static unsigned int dmem = 0;
//PacketBench (instr. memory size calculation flag)
static unsigned int imem = 0;
//PacketBench (memory access patterns for packet x (non-zero!))
static unsigned int mem_pattern = 0;
//PacketBench (instr access patterns for packet x (non-zero!))
static unsigned int instr_pattern = 0;
//PacketBench (instructions executed per packet)
static counter_t insn_per_packet = 0;
//PacketBench (packet memory accesses per packet)
static counter_t mem_acc_per_packet = 0;
//PacketBench (total number of accesses to non-packet memory)
static counter_t non_mem_acc = 0;
//PacketBench (non-packet memory accesses per packet)
static counter_t non_mem_acc_per_packet = 0;
//PacketBench (start address of packet memory)
static unsigned int mem_start = 0;
//PacketBench (end address of packet memory)
static unsigned int mem_end = 0;
//PacketBench (enable verbose printing of instructions)
static unsigned int pb_verbose = 0;
// PacketBench (instruction class profile for specific function)
static struct stat_stat_t *ic_prof_func = NULL;
// PacketBench (count instructions per packet)
static struct stat_stat_t  *per_packet_insn = NULL;
// PacketBench (count packet memory accesses per packet)
static struct stat_stat_t  *per_packet_mem_acc = NULL;
// PacketBench (count non-packet memory accesses per packet)
static struct stat_stat_t  *per_packet_non_mem_acc = NULL;

/* register simulator-specific options */
void sim_reg_options(struct opt_odb_t *odb)
{
  opt_reg_header(odb, 
"sim-profile: This simulator implements a functional simulator with\n"
"profiling support.  Run with the `-h' flag to see profiling options\n"
"available.\n"
		 );

  /* instruction limit */
  opt_reg_uint(odb, "-max:inst", "maximum number of inst's to execute",
	       &max_insts, /* default */0,
	       /* print */TRUE, /* format */NULL);

  //PacketBench
  opt_reg_uint(odb, "-start_addr", "start address of function (PacketBench)",
	       &start_addr, /* default */0,
	       /* print */TRUE, /* format */NULL);

  //PacketBench
  opt_reg_uint(odb, "-end_addr", "end address of function (PacketBench)",
	       &end_addr, /* default */0,
	       /* print */TRUE, /* format */NULL);

  //PacketBench
  opt_reg_uint(odb, "-imem","instr. memory size calculation (PacketBench)",
               &imem, 1,TRUE,NULL);
 
  //PacketBench
  opt_reg_uint(odb, "-dmem","data memory size calculation (PacketBench)",
	       &dmem, 1,TRUE,NULL);

  //PacketBench
  opt_reg_uint(odb, "-mem_pattern","memory access pattern analysis for packet x (PacketBench)",
	       &mem_pattern, 1,TRUE,NULL);

  //PacketBench
  opt_reg_uint(odb, "-instr_pattern"," instruction pattern analysis for packet x (PacketBench)",
	       &instr_pattern, 1,TRUE,NULL);

  //PacketBench
  opt_reg_uint(odb, "-mem_start", "start address of packet memory (PacketBench)",
	       &mem_start, /* default */0,
	       /* print */TRUE, /* format */NULL);

  //PacketBench
  opt_reg_uint(odb, "-mem_end", "end address of packet memory (PacketBench)",
	       &mem_end, /* default */0,
	       /* print */TRUE, /* format */NULL);

  //PacketBench
  opt_reg_uint(odb, "-pb_verbose", "verbose display of PacketBench data (PacketBench)",
	       &pb_verbose, /* default */0,
	       /* print */TRUE, /* format */NULL);

  //PacketBench
  opt_reg_uint(odb, "-func_calls",
		   "number of function calls (PacketBench)",
		   &func_calls,100,TRUE,NULL);

  opt_reg_flag(odb, "-all", "enable all profile options",
	       &prof_all, /* default */FALSE, /* print */TRUE, NULL);

  opt_reg_flag(odb, "-iclass", "enable instruction class profiling",
	       &prof_ic, /* default */FALSE, /* print */TRUE, NULL);

  opt_reg_flag(odb, "-iprof", "enable instruction profiling",
	       &prof_inst, /* default */FALSE, /* print */TRUE, NULL);

  opt_reg_flag(odb, "-brprof", "enable branch instruction profiling",
	       &prof_bc, /* default */FALSE, /* print */TRUE, NULL);

  opt_reg_flag(odb, "-amprof", "enable address mode profiling",
	       &prof_am, /* default */FALSE, /* print */TRUE, NULL);

  opt_reg_flag(odb, "-segprof", "enable load/store address segment profiling",
	       &prof_seg, /* default */FALSE, /* print */TRUE, NULL);

  opt_reg_flag(odb, "-tsymprof", "enable text symbol profiling",
	       &prof_tsyms, /* default */FALSE, /* print */TRUE, NULL);

  opt_reg_flag(odb, "-taddrprof", "enable text address profiling",
	       &prof_taddr, /* default */FALSE, /* print */TRUE, NULL);

  opt_reg_flag(odb, "-dsymprof", "enable data symbol profiling",
	       &prof_dsyms, /* default */FALSE, /* print */TRUE, NULL);

  opt_reg_flag(odb, "-internal",
	       "include compiler-internal symbols during symbol profiling",
	       &load_locals, /* default */FALSE, /* print */TRUE, NULL);

  opt_reg_string_list(odb, "-pcstat",
		      "profile stat(s) against text addr's (mult uses ok)",
		      pcstat_vars, MAX_PCSTAT_VARS, &pcstat_nelt, NULL,
		      /* !print */FALSE, /* format */NULL, /* accrue */TRUE);
}

/* check simulator-specific option values */
void
sim_check_options(struct opt_odb_t *odb, int argc, char **argv)
{
  if (prof_all)
    {
      /* enable all options */
      prof_ic = TRUE;
      prof_inst = TRUE;
      prof_bc = TRUE;
      prof_am = TRUE;
      prof_seg = TRUE;
      prof_tsyms = TRUE;
      prof_dsyms = TRUE;
      prof_taddr = TRUE;
    }
}

/* instruction classes */
enum inst_class_t {
  ic_load,		/* load inst */
  ic_store,		/* store inst */
  ic_uncond,		/* uncond branch */
  ic_cond,		/* cond branch */
  ic_icomp,		/* all other integer computation */
  ic_fcomp,		/* all floating point computation */
  ic_trap,		/* system call */
  ic_NUM
};

/* instruction class strings */
static char *inst_class_str[ic_NUM] = {
  "load",		/* load inst */
  "store",		/* store inst */
  "uncond branch",	/* uncond branch */
  "cond branch",	/* cond branch */
  "int computation",	/* all other integer computation */
  "fp computation",	/* all floating point computation */
  "trap"		/* system call */
};

/* instruction class profile */
static struct stat_stat_t *ic_prof = NULL;

/* instruction description strings */
static char *inst_str[OP_MAX];

/* instruction profile */
static struct stat_stat_t *inst_prof = NULL;

/* branch class profile */
enum branch_class_t {
  bc_uncond_dir,	/* direct unconditional branch */
  bc_cond_dir,		/* direct conditional branch */
  bc_call_dir,		/* direct functional call */
  bc_uncond_indir,	/* indirect unconditional branch */
  bc_cond_indir,	/* indirect conditional branch */
  bc_call_indir,	/* indirect function call */
  bc_NUM
};

/* branch class description strings */
static char *branch_class_str[bc_NUM] = {
  "uncond direct",	/* direct unconditional branch */
  "cond direct",	/* direct conditional branch */
  "call direct",	/* direct functional call */
  "uncond indirect",	/* indirect unconditional branch */
  "cond indirect",	/* indirect conditional branch */
  "call indirect"	/* indirect function call */
};

/* branch profile */
static struct stat_stat_t *bc_prof = NULL;

/* addressing mode profile */
static struct stat_stat_t *am_prof = NULL;

/* address segments */
enum addr_seg_t {
  seg_data,		/* data segment */
  seg_heap,		/* heap segment */
  seg_stack,		/* stack segment */
  seg_text,		/* text segment */
  seg_NUM
};

/* address segment strings */
static char *addr_seg_str[seg_NUM] = {
  "data segment",	/* data segment */
  "heap segment",	/* heap segment */
  "stack segment",	/* stack segment */
  "text segment",	/* text segment */
};

/* address segment profile */
static struct stat_stat_t *seg_prof = NULL;

/* bind ADDR to the segment it references */
static enum addr_seg_t			/* segment referenced by ADDR */
bind_to_seg(md_addr_t addr)		/* address to bind to a segment */
{
  if (ld_data_base <= addr && addr < (ld_data_base + ld_data_size))
    return seg_data;
  else if ((ld_data_base + ld_data_size) <= addr && addr < ld_brk_point)
    return seg_heap;
  /* FIXME: ouch! */
  else if ((ld_stack_base - (16*1024*1024)) <= addr && addr < ld_stack_base)
    return seg_stack;
  else if (ld_text_base <= addr && addr < (ld_text_base + ld_text_size))
    return seg_text;
  else
    panic("cannot bind address to segment");
}

/* text symbol profile */
static struct stat_stat_t *tsym_prof = NULL;
static char **tsym_names = NULL;

/* data symbol profile */
static struct stat_stat_t *dsym_prof = NULL;
static char **dsym_names = NULL;

/* text address profile */
static struct stat_stat_t *taddr_prof = NULL;

/* text-based stat profiles */
static struct stat_stat_t *pcstat_stats[MAX_PCSTAT_VARS];
static counter_t pcstat_lastvals[MAX_PCSTAT_VARS];
static struct stat_stat_t *pcstat_sdists[MAX_PCSTAT_VARS];

/* wedge all stat values into a counter_t */
#define STATVAL(STAT)							\
  ((STAT)->sc == sc_int							\
   ? (counter_t)*((STAT)->variant.for_int.var)				\
   : ((STAT)->sc == sc_uint						\
      ? (counter_t)*((STAT)->variant.for_uint.var)			\
      : ((STAT)->sc == sc_counter					\
	 ? *((STAT)->variant.for_counter.var)				\
	 : (panic("bad stat class"), 0))))

/* register simulator-specific statistics */
void
sim_reg_stats(struct stat_sdb_t *sdb)
{
  int i;

  stat_reg_counter(sdb, "sim_num_insn",
		   "total number of instructions executed",
		   &sim_num_insn, sim_num_insn, NULL);
  stat_reg_counter(sdb, "sim_num_refs",
		   "total number of loads and stores executed",
		   &sim_num_refs, 0, NULL);
  stat_reg_int(sdb, "sim_elapsed_time",
	       "total simulation time in seconds",
	       &sim_elapsed_time, 0, NULL);
  stat_reg_formula(sdb, "sim_inst_rate",
		   "simulation speed (in insts/sec)",
		   "sim_num_insn / sim_elapsed_time", NULL);

  //PacketBench
  stat_reg_counter(sdb, "sim_insn_func",
		   "total number of instructions for function",
		   &sim_insn_func,0,NULL);

  //PacketBench
  stat_reg_counter(sdb,"insn_per_packet",
		   "instructions executed per packet",
		  &insn_per_packet,0,NULL);

  //PacketBench
  stat_reg_counter(sdb, "mem_access",
		   "total memory accesses to packet memory",
		   &mem_acc,0,NULL);

  //PacketBench
  stat_reg_counter(sdb,"mem_acc_per_packet",
		   "packet memory accesses per packet",
		  &mem_acc_per_packet,0,NULL);

  //PacketBench
  stat_reg_counter(sdb, "non_mem_access_per_packet",
		   "non-packet memory accesses per packet",
		   &non_mem_acc_per_packet,0,NULL);

  //PacketBench
  stat_reg_counter(sdb, "non_mem_access",
		   "total memory accesses to non-packet memory",
		   &non_mem_acc,0,NULL);

  //PacketBench
  ic_prof_func = stat_reg_dist(sdb, "sim_inst_class_prof_func",
			  "instruction class profile for function",
			  /* initial value */0,
			  /* array size */ic_NUM,
			  /* bucket size */1,
			  /* print format */(PF_COUNT|PF_PDF),
			  /* format */NULL,
			  /* index map */inst_class_str,
			  /* print fn */NULL);

  //PacketBench
  per_packet_insn = stat_reg_dist(sdb, "sim_inst_per_packet",
			"instructions per packet",
		      /* initial value */0,
		      /* array size */func_calls+1,
		      /* bucket size */1,
		      /* print format */PF_COUNT,
		      /* format */NULL,
		      /* index map */NULL,
		      /* print fn */NULL);

  //PacketBench
  per_packet_mem_acc = stat_reg_dist(sdb, "sim_mem_acc_per_packet",
			"packet memory accesses per packet",
		      /* initial value */0,
		      /* array size */func_calls+1,
		      /* bucket size */1,
		      /* print format */PF_COUNT,
		      /* format */NULL,
		      /* index map */NULL,
		      /* print fn */NULL);

  //PacketBench
  per_packet_non_mem_acc = stat_reg_dist(sdb, "sim_non_mem_acc_per_packet",
			"non-packet memory accesses per packet",
		      /* initial value */0,
		      /* array size */func_calls+1,
		      /* bucket size */1,
		      /* print format */PF_COUNT,
		      /* format */NULL,
		      /* index map */NULL,
		      /* print fn */NULL);

  if (prof_ic)
    {
      /* instruction class profile */
      ic_prof = stat_reg_dist(sdb, "sim_inst_class_prof",
			      "instruction class profile",
			      /* initial value */0,
			      /* array size */ic_NUM,
			      /* bucket size */1,
			      /* print format */(PF_COUNT|PF_PDF),
			      /* format */NULL,
			      /* index map */inst_class_str,
			      /* print fn */NULL);
    }

  if (prof_inst)
    {
      int i;
      char buf[512];

      /* conjure up appropriate instruction description strings */
      for (i=0; i < /* skip NA */OP_MAX-1; i++)
	{
	  sprintf(buf, "%-8s %-6s", md_op2name[i+1], md_op2format[i+1]);
	  inst_str[i] = mystrdup(buf);
	}
      
      /* instruction profile */
      inst_prof = stat_reg_dist(sdb, "sim_inst_prof",
				"instruction profile",
				/* initial value */0,
				/* array size */ /* skip NA */OP_MAX-1,
				/* bucket size */1,
				/* print format */(PF_COUNT|PF_PDF),
				/* format */NULL,
				/* index map */inst_str,
				/* print fn */NULL);
    }

  if (prof_bc)
    {
      /* instruction branch profile */
      bc_prof = stat_reg_dist(sdb, "sim_branch_prof",
			      "branch instruction profile",
			      /* initial value */0,
			      /* array size */bc_NUM,
			      /* bucket size */1,
			      /* print format */(PF_COUNT|PF_PDF),
			      /* format */NULL,
			      /* index map */branch_class_str,
			      /* print fn */NULL);
    }

  if (prof_am)
    {
      /* instruction branch profile */
      am_prof = stat_reg_dist(sdb, "sim_addr_mode_prof",
			      "addressing mode profile",
			      /* initial value */0,
			      /* array size */md_amode_NUM,
			      /* bucket size */1,
			      /* print format */(PF_COUNT|PF_PDF),
			      /* format */NULL,
			      /* index map */md_amode_str,
			      /* print fn */NULL);
    }

  if (prof_seg)
    {
      /* instruction branch profile */
      seg_prof = stat_reg_dist(sdb, "sim_addr_seg_prof",
			       "load/store address segment profile",
			       /* initial value */0,
			       /* array size */seg_NUM,
			       /* bucket size */1,
			       /* print format */(PF_COUNT|PF_PDF),
			       /* format */NULL,
			       /* index map */addr_seg_str,
			       /* print fn */NULL);
    }

  if (prof_tsyms && sym_ntextsyms != 0)
    {
      int i;

      /* load program symbols */
      sym_loadsyms(ld_prog_fname, load_locals);

      /* conjure up appropriate instruction description strings */
      tsym_names = (char **)calloc(sym_ntextsyms, sizeof(char *));

      for (i=0; i < sym_ntextsyms; i++)
	tsym_names[i] = sym_textsyms[i]->name;
      
      /* text symbol profile */
      tsym_prof = stat_reg_dist(sdb, "sim_text_sym_prof",
				"text symbol profile",
				/* initial value */0,
				/* array size */sym_ntextsyms,
				/* bucket size */1,
				/* print format */(PF_COUNT|PF_PDF),
				/* format */NULL,
				/* index map */tsym_names,
				/* print fn */NULL);
    }

  if (prof_dsyms && sym_ndatasyms != 0)
    {
      int i;

      /* load program symbols */
      sym_loadsyms(ld_prog_fname, load_locals);

      /* conjure up appropriate instruction description strings */
      dsym_names = (char **)calloc(sym_ndatasyms, sizeof(char *));

      for (i=0; i < sym_ndatasyms; i++)
	dsym_names[i] = sym_datasyms[i]->name;
      
      /* data symbol profile */
      dsym_prof = stat_reg_dist(sdb, "sim_data_sym_prof",
				"data symbol profile",
				/* initial value */0,
				/* array size */sym_ndatasyms,
				/* bucket size */1,
				/* print format */(PF_COUNT|PF_PDF),
				/* format */NULL,
				/* index map */dsym_names,
				/* print fn */NULL);
    }

  if (prof_taddr)
    {
      /* text address profile (sparse profile), NOTE: a dense print format
         is used, its more difficult to read, but the profiles are *much*
	 smaller, I've assumed that the profiles are read by programs, at
	 least for your sake I hope this is the case!! */
      taddr_prof = stat_reg_sdist(sdb, "sim_text_addr_prof",
				  "text address profile",
				  /* initial value */0,
				  /* print format */(PF_COUNT|PF_PDF),
				  /* format */"0x%p %u %.2f",
				  /* print fn */NULL);
    }

  for (i=0; i<pcstat_nelt; i++)
    {
      char buf[512], buf1[512];
      struct stat_stat_t *stat;

      /* track the named statistical variable by text address */

      /* find it... */
      stat = stat_find_stat(sdb, pcstat_vars[i]);
      if (!stat)
	fatal("cannot locate any statistic named `%s'", pcstat_vars[i]);

      /* stat must be an integral type */
      if (stat->sc != sc_int && stat->sc != sc_uint && stat->sc != sc_counter)
	fatal("`-pcstat' statistical variable `%s' is not an integral type",
	      stat->name);

      /* register this stat */
      pcstat_stats[i] = stat;
      pcstat_lastvals[i] = STATVAL(stat);

      /* declare the sparce text distribution */
      sprintf(buf, "%s_by_pc", stat->name);
      sprintf(buf1, "%s (by text address)", stat->desc);
      pcstat_sdists[i] = stat_reg_sdist(sdb, buf, buf1,
					/* initial value */0,
					/* print format */(PF_COUNT|PF_PDF),
					/* format */"0x%p %u %.2f",
					/* print fn */NULL);
    }
  ld_reg_stats(sdb);
  mem_reg_stats(mem, sdb);
}

/* initialize the simulator */
void
sim_init(void)
{
  sim_num_refs = 0;

  /* allocate and initialize register file */
  regs_init(&regs);

  /* allocate and initialize memory space */
  mem = mem_create("mem");
  mem_init(mem);
}

/* local machine state accessor */
static char *					/* err str, NULL for no err */
profile_mstate_obj(FILE *stream,		/* output stream */
		   char *cmd,			/* optional command string */
		   struct regs_t *regs,		/* registers to access */
		   struct mem_t *mem)		/* memory to access */
{
  /* just dump intermediate stats */
  sim_print_stats(stream);

  /* no error */
  return NULL;
}

/* load program into simulated state */
void
sim_load_prog(char *fname,		/* program to load */
	      int argc, char **argv,	/* program arguments */
	      char **envp)		/* program environment */
{
  /* load program text and data, set up environment, memory, and regs */
  ld_load_prog(fname, argc, argv, envp, &regs, mem, TRUE);

  /* initialize the DLite debugger */
  dlite_init(md_reg_obj, dlite_mem_obj, profile_mstate_obj);
}


/* print simulator-specific configuration information */
void
sim_aux_config(FILE *stream)		/* output stream */
{
  /* nothing currently */
}

/* dump simulator-specific auxiliary simulator statistics */
void
sim_aux_stats(FILE *stream)		/* output stream */
{
}

/* un-initialize simulator-specific state */
void
sim_uninit(void)
{
  /* nada */
}


/*
 * configure the execution engine
 */

/*
 * precise architected register accessors
 */

/* next program counter */
#define SET_NPC(EXPR)		(regs.regs_NPC = (EXPR))

/* current program counter */
#define CPC			(regs.regs_PC)

/* general purpose registers */
#define GPR(N)			(regs.regs_R[N])
#define SET_GPR(N,EXPR)                                                 \
  ((void)(((N) == 15) ? setPC++ : 0), regs.regs_R[N] = (EXPR))

#if defined(TARGET_PISA)

/* floating point registers, L->word, F->single-prec, D->double-prec */
#define FPR_L(N)		(regs.regs_F.l[(N)])
#define SET_FPR_L(N,EXPR)	(regs.regs_F.l[(N)] = (EXPR))
#define FPR_F(N)		(regs.regs_F.f[(N)])
#define SET_FPR_F(N,EXPR)	(regs.regs_F.f[(N)] = (EXPR))
#define FPR_D(N)		(regs.regs_F.d[(N) >> 1])
#define SET_FPR_D(N,EXPR)	(regs.regs_F.d[(N) >> 1] = (EXPR))

/* miscellaneous register accessors */
#define SET_HI(EXPR)		(regs.regs_C.hi = (EXPR))
#define HI			(regs.regs_C.hi)
#define SET_LO(EXPR)		(regs.regs_C.lo = (EXPR))
#define LO			(regs.regs_C.lo)
#define FCC			(regs.regs_C.fcc)
#define SET_FCC(EXPR)		(regs.regs_C.fcc = (EXPR))

#elif defined(TARGET_ALPHA)

/* floating point registers, L->word, F->single-prec, D->double-prec */
#define FPR_Q(N)		(regs.regs_F.q[N])
#define SET_FPR_Q(N,EXPR)	(regs.regs_F.q[N] = (EXPR))
#define FPR(N)			(regs.regs_F.d[N])
#define SET_FPR(N,EXPR)		(regs.regs_F.d[N] = (EXPR))

/* miscellaneous register accessors */
#define FPCR			(regs.regs_C.fpcr)
#define SET_FPCR(EXPR)		(regs.regs_C.fpcr = (EXPR))
#define UNIQ			(regs.regs_C.uniq)
#define SET_UNIQ(EXPR)		(regs.regs_C.uniq = (EXPR))

#elif defined(TARGET_ARM)

/* processor status register */
#define PSR                     (regs.regs_C.cpsr)
#define SET_PSR(EXPR)           (regs.regs_C.cpsr = (EXPR))

#define PSR_N                   _PSR_N(regs.regs_C.cpsr)
#define SET_PSR_N(EXPR)         _SET_PSR_N(regs.regs_C.cpsr, (EXPR))
#define PSR_C                   _PSR_C(regs.regs_C.cpsr)
#define SET_PSR_C(EXPR)         _SET_PSR_C(regs.regs_C.cpsr, (EXPR))
#define PSR_Z                   _PSR_Z(regs.regs_C.cpsr)
#define SET_PSR_Z(EXPR)         _SET_PSR_Z(regs.regs_C.cpsr, (EXPR))
#define PSR_V                   _PSR_V(regs.regs_C.cpsr)
#define SET_PSR_V(EXPR)         _SET_PSR_V(regs.regs_C.cpsr, (EXPR))

/* floating point conversions */
union x { float f; word_t w; };
#define DTOW(D)         ({ union x fw; fw.f = (float)(D); fw.w; })
#define WTOD(W)         ({ union x fw; fw.w = (W); (double)fw.f; })
#define QSWP(Q)                                                         \
  ((((Q) << 32) & ULL(0xffffffff00000000)) | (((Q) >> 32) & ULL(0xffffffff)))

/* floating point registers, L->word, F->single-prec, D->double-prec */
#define FPR_Q(N)                (QSWP(regs.regs_F.q[N]))
#define SET_FPR_Q(N,EXPR)       (regs.regs_F.q[N] = QSWP((EXPR)))
#define FPR_W(N)                (DTOW(regs.regs_F.d[N]))
#define SET_FPR_W(N,EXPR)       (regs.regs_F.d[N] = (WTOD(EXPR)))
#define FPR(N)                  (regs.regs_F.d[(N)])
#define SET_FPR(N,EXPR)         (regs.regs_F.d[(N)] = (EXPR))

/* miscellaneous register accessors */
#define FPSR                    (regs.regs_C.fpsr)
#define SET_FPSR(EXPR)          (regs.regs_C.fpsr = (EXPR))

#else
#error No ISA target defined...
#endif

/* precise architected memory state accessor macros */
#define READ_BYTE(SRC, FAULT)						\
  ((FAULT) = md_fault_none, MEM_READ_BYTE(mem, addr = (SRC)))
#define READ_HALF(SRC, FAULT)						\
  ((FAULT) = md_fault_none, MEM_READ_HALF(mem, addr = (SRC)))
#define READ_WORD(SRC, FAULT)						\
  ((FAULT) = md_fault_none, MEM_READ_WORD(mem, addr = (SRC)))
#ifdef HOST_HAS_QWORD
#define READ_QWORD(SRC, FAULT)						\
  ((FAULT) = md_fault_none, MEM_READ_QWORD(mem, addr = (SRC)))
#endif /* HOST_HAS_QWORD */

#define WRITE_BYTE(SRC, DST, FAULT)					\
  ((FAULT) = md_fault_none, MEM_WRITE_BYTE(mem, addr = (DST), (SRC)))
#define WRITE_HALF(SRC, DST, FAULT)					\
  ((FAULT) = md_fault_none, MEM_WRITE_HALF(mem, addr = (DST), (SRC)))
#define WRITE_WORD(SRC, DST, FAULT)					\
  ((FAULT) = md_fault_none, MEM_WRITE_WORD(mem, addr = (DST), (SRC)))
#ifdef HOST_HAS_QWORD
#define WRITE_QWORD(SRC, DST, FAULT)					\
  ((FAULT) = md_fault_none, MEM_WRITE_QWORD(mem, addr = (DST), (SRC)))
#endif /* HOST_HAS_QWORD */

/* system call handler macro */
#define SYSCALL(INST)	sys_syscall(&regs, mem_access, mem, INST, TRUE)


/* addressing mode FSM (dest of last LUI, used for decoding addr modes) */
static unsigned int fsm = 0;

//PacketBench
static int count_flag = FALSE;


/* start simulation, program loaded, processor precise state initialized */
void
sim_main(void)
{
  int i;
  md_inst_t inst;
  register md_addr_t addr;
  register int is_write;
  enum md_opcode op;
  unsigned int flags;
  enum md_fault_type fault;
  int setPC;

  //Start PacketBench
  int function_calls=0; // keep track of function execution
  FILE *fp,*fp1,*fp2,*fp3,*fp4;

  fprintf(stderr, "sim: ** starting functional simulation **\n");
  
  if (imem == 1)
    { 
      fp = fopen("imem.dat","w");
    }

  if (dmem == 1)
    {
      fp4 = fopen("dmem.dat","w");
    }

  if (mem_pattern != 0)
    {
      fp1 = fopen("mem_pattern.dat","w");
      myfprintf(fp1,"#Memory access patterns for packet %d",mem_pattern);
      myfprintf(fp1,"\n");
    }
  
  if (instr_pattern != 0)
    {
      fp2 = fopen("instr_pattern.dat","w");
      myfprintf(fp2,"#Instruction access patterns for packet %d",instr_pattern);
      myfprintf(fp2,"\n");
    }

  if (pb_verbose == 1)
    {
      fp3 = fopen("pb_intr.trace","w");
      myfprintf(fp3,"\n####################################################");
      myfprintf(fp3,"\n########## PacketBench Instruction Trace ###########");
      myfprintf(fp3,"\n##########           (%d packets)        ###########",func_calls+1);
      myfprintf(fp3,"\n####################################################");
      myfprintf(fp3,"\n\n");
    }
  //End PacketBench

  /* set up initial default next PC */
  regs.regs_NPC = regs.regs_PC + sizeof(md_inst_t);

  /* synchronize register files... */
  regs.regs_R[MD_REG_PC] = regs.regs_PC;

  /* check for DLite debugger entry condition */
  if (dlite_check_break(regs.regs_PC, /* no access */0, /* addr */0, 0, 0))
    dlite_main(regs.regs_PC - sizeof(md_inst_t), regs.regs_PC,
	       sim_num_insn, &regs, mem);

  while (TRUE)
    {
      /* maintain $r0 semantics */
#ifndef TARGET_ARM
      regs.regs_R[MD_REG_ZERO] = 0;
#endif
#ifdef TARGET_ALPHA
      regs.regs_F.d[MD_REG_ZERO] = 0.0;
#endif /* TARGET_ALPHA */

      /* get the next instruction to execute */
      MD_FETCH_INST(inst, mem, regs.regs_PC);


      //Start PacketBench
      if (regs.regs_PC == start_addr)
	{
	  // start counting
	  count_flag = TRUE;
	  // function has been executed once more
	  function_calls++;
	  if (pb_verbose == 1)
	    {
	      myfprintf(fp3,"\n");
	      myfprintf(fp3,"# Start - PacketBench Instruction Trace - Packet %d",function_calls-1);
	      myfprintf(fp3,"\n");
	    }
	}

      if (regs.regs_PC == end_addr)
	{
	  if (pb_verbose == 1)
	    {
	      myfprintf(fp3,"\n");
	      myfprintf(fp3,"# End - PacketBench Instruction Trace - Packet %d",function_calls-1);
	      myfprintf(fp3,"\n");
	    }

	  count_flag = FALSE;
	  // count instructions per packet
	  stat_add_samples(per_packet_insn,function_calls-1,insn_per_packet);
	  // count packet memory accesses per packet
	  stat_add_samples(per_packet_mem_acc,function_calls-1,mem_acc_per_packet);
	  // count non-packet memory accesses per packet
	  stat_add_samples(per_packet_non_mem_acc,function_calls-1,non_mem_acc_per_packet);
	  
	  // reset per packet counters
	  mem_acc_per_packet=0;
	  insn_per_packet=0;
	  non_mem_acc_per_packet=0;	 		  

	  // Do analysis "func_calls" times
	  if ((function_calls-1) == func_calls) 
	    return;
	}

      if (count_flag == TRUE)
	{
	  // increment total instructions executed
	  sim_insn_func++;
	  // one more instruction for *this* function call
	  insn_per_packet++;

	  // For computing instruction memory size
	  if (imem == 1)
	    {
	      myfprintf(fp,"0x%08x",regs.regs_PC);
	      myfprintf(fp,"\n");
	    }

	  // For computing instruction pattern
	  if ((instr_pattern != 0) && (instr_pattern == (function_calls-1)))
	    {
	      myfprintf(fp2,"%d\t",(insn_per_packet-1));
	      myfprintf(fp2,"%d",regs.regs_PC);
	      myfprintf(fp2,"\n");
	    }

	  if (pb_verbose == 1)
	    {
	      myfprintf(fp3,"\n%d\t",(insn_per_packet-1));
	      myfprintf(fp3,"%u : ",regs.regs_PC);
	      md_print_insn(inst, regs.regs_PC, fp3);
	    }
	}
      //End PacketBench

      if (verbose)
	{
	  myfprintf(stderr, "%10n @ 0x%08p: ", sim_num_insn, regs.regs_PC);
	  md_print_insn(inst, regs.regs_PC, stderr);
	  fprintf(stderr, "\n");
	  fflush(stderr);
	}

      /* keep an instruction count */
      sim_num_insn++;

      /* set default reference address and access mode */
      addr = 0; is_write = FALSE;

      /* set default fault - none */
      fault = md_fault_none;

      /* decode the instruction */
      MD_SET_OPCODE(op, inst);

      if (op == NA)
        fatal("bogus opcode detected @ 0x%08p", regs.regs_PC);

      setPC = 0;
      regs.regs_R[MD_REG_PC] = regs.regs_PC;

      /* execute the instruction */
      switch (op)
	{
#define DEFINST(OP,MSK,NAME,OPFORM,RES,FLAGS,O1,O2,O3,I1,I2,I3,I4)	\
	case OP:							\
          SYMCAT(OP,_IMPL);						\
          break;
#define DEFLINK(OP,MSK,NAME,MASK,SHIFT)					\
        case OP:							\
          panic("attempted to execute a linking opcode");
#define CONNECT(OP)
#define DECLARE_FAULT(FAULT)						\
	  { fault = (FAULT); break; }
#include "machine.def"
	default:
	  panic("attempted to execute a bogus opcode");
	}

      if (fault != md_fault_none)
        fatal("fault (%d) detected @ 0x%08p", fault, regs.regs_PC);

      if (setPC != 0/* regs.regs_R[MD_REG_PC] != regs.regs_PC */)
        regs.regs_NPC = regs.regs_R[MD_REG_PC];

      if (MD_OP_FLAGS(op) & F_MEM)
	{
	  // This is a memory instruction
	  sim_num_refs++;
	  if (MD_OP_FLAGS(op) & F_STORE)
	    is_write = TRUE;

	  // Start PacketBench
	  if (count_flag == TRUE)
	    {
	      if (pb_verbose == 1)
		{
		  myfprintf(fp3," : %u",addr);
		}

	      // For computing data memory size
	      if (dmem == 1)
		{
		  myfprintf(fp4,"0x%08x",addr);
		  myfprintf(fp4,"\n");
		}
	      
	      // Count accesses to packet memory
	      if ((addr >= mem_start) && (addr < mem_end))
	        {
		  mem_acc++;
		  mem_acc_per_packet++;

		  // Tag packet memory access (pattern)
		  if ((mem_pattern != 0) && (mem_pattern == (function_calls-1)))
		    {
		      // 1 --> write to packet memory
		      // 2 --> read from packet memory
		      if (MD_OP_FLAGS(op) & F_STORE)
			{
			  myfprintf(fp1,"%d\t1",(insn_per_packet-1));
			  myfprintf(fp1,"\n");
			}
		      else if (MD_OP_FLAGS(op) & F_LOAD)
			{
			  myfprintf(fp1,"%d\t2",(insn_per_packet-1));
			  myfprintf(fp1,"\n");
			}
		    }

		  if (pb_verbose == 1)
		    {
		      if (MD_OP_FLAGS(op) & F_STORE)
			{
			  myfprintf(fp3," : 1");
			}
		      if (MD_OP_FLAGS(op) & F_LOAD)
			{
			  myfprintf(fp3," : 2");
			}
		    }
	        }
	      else
		// Count accesses to non-packet memory
	        {
		  non_mem_acc++;
		  non_mem_acc_per_packet++;

		  // Tag non packet memory access (pattern)
		  if ((mem_pattern != 0) && (mem_pattern == (function_calls-1)))
		    {
		      // 3 --> write to non-packet memory
		      // 4 --> read from non-packet memory
		      if (MD_OP_FLAGS(op) & F_STORE)
			{
			  myfprintf(fp1,"%d\t3",(insn_per_packet-1));
			  myfprintf(fp1,"\n");
			}
		      else if (MD_OP_FLAGS(op) & F_LOAD)
			{
			  myfprintf(fp1,"%d\t4",(insn_per_packet-1));
			  myfprintf(fp1,"\n");
			}
		    }

		  if (pb_verbose == 1)
		    {
		      if (MD_OP_FLAGS(op) & F_STORE)
			{
			  myfprintf(fp3," : 3");
			}
		      if (MD_OP_FLAGS(op) & F_LOAD)
			{
			  myfprintf(fp3," : 4");
			}
		    }
		}
	    }	      
	}
      else
	{
	  // Tag other (non memory access) instructions
	  if (count_flag == TRUE) 
	    {
	      // 0 --> not a memory instruction
	      if ((mem_pattern != 0) && (mem_pattern == (function_calls-1)))
		{
		  myfprintf(fp1,"%d\t0",(insn_per_packet-1));
		  myfprintf(fp1,"\n");
		}

	      if (pb_verbose == 1)
		{
		  myfprintf(fp3," : 0x-------- : 0");
		}
	    }
	}
      // End PacketBench

      /*
       * profile this instruction
       */
      flags = MD_OP_FLAGS(op);

      //Start PacketBench - to profile within a particular function
      if (count_flag == TRUE)
	{
	  enum inst_class_t ic;

	  /* compute instruction class */
	  if (flags & F_LOAD)
	    ic = ic_load;
	  else if (flags & F_STORE)
	    ic = ic_store;
#if 0
	  else if (flags & F_UNCOND)
	    ic = ic_uncond;
	  else if (flags & F_COND)
	    ic = ic_cond;      
#endif
	  else if (flags & F_CTRL && COND == COND_AL)
	    ic = ic_uncond;
	  else if (flags & F_CTRL && COND != COND_AL)
	    ic = ic_cond;
	  else if (flags & F_ICOMP)
	    ic = ic_icomp;
	  else if (flags & F_FCOMP)
	    ic = ic_fcomp;
	  else if (flags & F_TRAP)
	    ic = ic_trap;
	  else
	    panic("instruction has no class");

	  /* update instruction class profile */
	  stat_add_sample(ic_prof_func, (int)ic);
	}
      //End PacketBench

      if (prof_ic)
	{
	  enum inst_class_t ic;

	  /* compute instruction class */
	  if (flags & F_LOAD)
	    ic = ic_load;
	  else if (flags & F_STORE)
	    ic = ic_store;
#if 0
	  else if (flags & F_UNCOND)
	    ic = ic_uncond;
	  else if (flags & F_COND)
	    ic = ic_cond;      
#endif
	  else if (flags & F_CTRL && COND == COND_AL)
	    ic = ic_uncond;
	  else if (flags & F_CTRL && COND != COND_AL)
	    ic = ic_cond;
	  else if (flags & F_ICOMP)
	    ic = ic_icomp;
	  else if (flags & F_FCOMP)
	    ic = ic_fcomp;
	  else if (flags & F_TRAP)
	    ic = ic_trap;
	  else
	    panic("instruction has no class");

	  /* update instruction class profile */
	  stat_add_sample(ic_prof, (int)ic);
	}

      if (prof_inst)
	{
	  /* update instruction profile */
	  stat_add_sample(inst_prof, (int)op - /* skip NA */1);
	}

#if FIXME
      if (prof_bc)
	{
	  enum branch_class_t bc;

	  /* compute instruction class */
	  if (flags & F_CTRL)
	    {
	      if ((flags & (F_CALL|F_DIRJMP)) == (F_CALL|F_DIRJMP))
		bc = bc_call_dir;
	      else if ((flags & (F_CALL|F_INDIRJMP)) == (F_CALL|F_INDIRJMP))
		bc = bc_call_indir;
	      else if ((flags & (F_UNCOND|F_DIRJMP)) == (F_UNCOND|F_DIRJMP))
		bc = bc_uncond_dir;
	      else if ((flags & (F_UNCOND|F_INDIRJMP))== (F_UNCOND|F_INDIRJMP))
		bc = bc_uncond_indir;
	      else if ((flags & (F_COND|F_DIRJMP)) == (F_COND|F_DIRJMP))
		bc = bc_cond_dir;
	      else if ((flags & (F_COND|F_INDIRJMP)) == (F_COND|F_INDIRJMP))
		bc = bc_cond_indir;
	      else
		panic("branch has no class");

	      /* update instruction class profile */
	      stat_add_sample(bc_prof, (int)bc);
	    }
	}
#endif

#if FIXME
      if (prof_am)
	{
	  enum md_amode_type am;

	  /* update addressing mode pre-probe FSM */
	  MD_AMODE_PREPROBE(op, fsm);

	  /* compute addressing mode */
	  if (flags & F_MEM)
	    {
	      /* compute addressing mode */
	      MD_AMODE_PROBE(am, op, fsm);

	      /* update the addressing mode profile */
	      stat_add_sample(am_prof, (int)am);

	      /* addressing mode pre-probe FSM, after all loads and stores */
	      MD_AMODE_POSTPROBE(fsm);
	    }
	}
#endif

#if FIXME
      if (prof_seg)
	{
	  if (flags & F_MEM)
	    {
	      /* update instruction profile */
	      stat_add_sample(seg_prof, (int)bind_to_seg(addr));
	    }
	}
#endif

      if (prof_tsyms)
	{
	  int tindex;

	  /* attempt to bind inst address to a text segment symbol */
	  sym_bind_addr(regs.regs_PC, &tindex, /* !exact */FALSE, sdb_text);

	  if (tindex >= 0)
	    {
	      if (tindex > sym_ntextsyms)
		panic("bogus text symbol index");

	      stat_add_sample(tsym_prof, tindex);
	    }
	  /* else, could not bind to a symbol */
	}

      if (prof_dsyms)
	{
	  int dindex;

	  if (flags & F_MEM)
	    {
	      /* attempt to bind inst address to a text segment symbol */
	      sym_bind_addr(addr, &dindex, /* !exact */FALSE, sdb_data);

	      if (dindex >= 0)
		{
		  if (dindex > sym_ndatasyms)
		    panic("bogus data symbol index");

		  stat_add_sample(dsym_prof, dindex);
		}
	      /* else, could not bind to a symbol */
	    }
	}

      if (prof_taddr)
	{
	  /* add regs_PC exec event to text address profile */
	  stat_add_sample(taddr_prof, regs.regs_PC);
	}

      /* update any stats tracked by PC */
      for (i=0; i<pcstat_nelt; i++)
	{
	  counter_t newval;
	  int delta;

	  /* check if any tracked stats changed */
	  newval = STATVAL(pcstat_stats[i]);
	  delta = newval - pcstat_lastvals[i];
	  if (delta != 0)
	    {
	      stat_add_samples(pcstat_sdists[i], regs.regs_PC, delta);
	      pcstat_lastvals[i] = newval;
	    }

	}

      /* check for DLite debugger entry condition */
      if (dlite_check_break(regs.regs_NPC,
			    is_write ? ACCESS_WRITE : ACCESS_READ,
			    addr, sim_num_insn, sim_num_insn))
	dlite_main(regs.regs_PC, regs.regs_NPC, sim_num_insn, &regs, mem);

      /* go to the next instruction */
      regs.regs_PC = regs.regs_NPC;
      regs.regs_NPC += sizeof(md_inst_t);

      /* finish early? */
      if (max_insts && sim_num_insn >= max_insts)
	return;
    }
}
