// TLM_POWER3 Energy-based for loosely-timed TLM.
// (C) 2011 DJ Greaves & MM Yasin, University of Cambridge Computer Laboratory.
// $Id: $

// pw_tlm_payload.cpp - bus distance energy accounting.
// This part of TLM_POWER3 is included if the user has configured TLM_PATH to the TLM2.0 library.


//
// We may wish to use a modified generic payload provided by this file.
// This enables distance and bit-level energy consumption to be modelled.
// If not desired, please set PW_TLM_PAYLOAD to 0.
//

#ifndef PW_TLM_PAYLOAD
// This must be set to at least one to compile this library, but other values
// can be used without loss of compatibility when compiling user code.
#define PW_TLM_PAYLOAD 1
#endif


#include "pw_tlm_payload.h"
#include "pw_energy.h"
#include "pw_length.h"
#include "pw_module_base.h"
#include "pw_debug.h"

bool g_kill_conf = false; // TODO delete me

namespace sc_pwr
{

  //It is important not to model both the charge and discharge of a given net, otherwise energy costs will be double counted.
  // We adopt the approach of only counting the zero to one transition of a net as energy consuming.
  const bool both_edges_count = false;

// Tally of number of bits set in a byte. E.g. the last entry is 0xFF which has 8 ones.
  int pw_tallytable256[256] = 
    {
      0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
    };
  
  void pw_tlm_generic_payload::pw_set_origin(sc_module *where, unsigned int flags, sc_pwr::tlm_bit_transition_tracker  *starting_bus)
  { 
#define TEMPKILL1 0
#define TEMPKILL2 0
    if (TEMPKILL1) return;
    if (flags) m_flags = flags; else m_flags = PW_TGP_DEFAULT_FLAGS;
    //printf("Set origin to %p, trk->src=%p\n", where, starting_bus->src);
    m_whence = where; 
    if (starting_bus)
      {
	compute_xitions(starting_bus);
      }
  }
  
  void pw_tlm_generic_payload::pw_terminus(sc_module *where)
  {
    if (TEMPKILL1) return;
    if (m_xitions == 0) return; // Returning early here instead of doing a multiply adds a good speed up.
    pw_log_hop(where);
    pw_energy_checkpoint(where);
  }



  void pw_tlm_generic_payload::pw_energy_checkpoint(sc_module *where)
  {
    // Credit energy to either source or dest, according to want.
    sc_module *it0 = (m_flags & PW_TGP_ACCT_DEST || !m_src_transtracker) ? where: m_src_transtracker->src;
    //printf("checkpointer %p and %p -> %p\n", m_src_transtracker, where, it0);
    pw_module_base *it = get_pw_module_base(*it0);
    if (!it && !m_src_transtracker)
      {
	pw_info << "loghop: wiring energy no mod: transaction visited un-annotated sc_module " << where->name() << ":" << where->kind() << pw_endl;
	// Will now try other end!
      }
    else if (!it && m_src_transtracker)
      {
	pw_debug << "loghop: wiring energy lost !: but have alternative " 
		 << where->name() << ":" << m_src_transtracker->src->name() << pw_endl;
	it = get_pw_module_base(*(m_src_transtracker->src));
	if (!it) 
	  pw_debug << "other end did not either!" << pw_endl;
	// Now try other end!
      }

    if (it)
      {
	// 0.5 CV^2   at 0.3pF/mm.
	//pw_energy wirecost = pw_energy(; // default energy per millimeter  TODO read from power file. TODO onchip/offchip etc.

	double length = m_distance_accumulator.to_meters() * 1000.0; // distance in millimetres  TODO move divide out of inner loop
	pw_energy l_energy = pw_energy(m_xitions * length * 0.3 * 0.5, PW_pJ); // TODO this multiply is not repeated under DMI?
	m_wiring_energy_accumulator +=  l_energy; 
	m_dmi_energy_accumulator +=  l_energy; 
	pw_info << " wiring energy d=" << m_distance_accumulator.round3sf() << " mm=" << length << ", xi=" << m_xitions << " e=" << l_energy << "\n";
	m_distance_accumulator = PW_ZERO_LENGTH;
	m_xitions = 0;

	it->record_energy_use(m_wiring_energy_accumulator, PW_ACCT_WIRING_DEFAULT, false);
	m_wiring_energy_accumulator = PW_ZERO_ENERGY;
      }
    else {
      pw_info << "No power module base: Wiring energy lost!" << pw_endl;
    }
  }
  
  int sc_pwr::tlm_bit_transition_tracker::data_transitions(unsigned char *data, int bytes, int buswidth)
  {
    xitions_t c = 0;

    if (TEMPKILL2) return 0;
    if (g_kill_conf ||  xition_switchers[PW_XITION_SWITCHER_DATA]->measurement_needed())
      {
	int bytewidth = (buswidth+7)/8;
	int words = bytes/bytewidth;
	int dp = 0;
	if (data)
	  for (int w = 0; w<words; w++)
	    {
	      assert(bytewidth <= MAX_TRACKED_DATA_WIDTH_BYTES);
	      for (int b=0; b<bytewidth; b++)
		{
		  unsigned char q0 = m_old_databus[b];
		  unsigned char q1 = data[dp++];
		  m_old_databus[b] = q1;
		  c += pw_tallytable256[(both_edges_count ? (q1 ^ q0) : (q1 & ~q0)) & 0xFF];
		}
	    }
	//printf("Data active transitions words=%i; bytewidth=%i; bits toggled=%i \n", words, bytewidth, c);
	if (!g_kill_conf) xition_switchers[PW_XITION_SWITCHER_DATA]->record_measurement(c);
      }
      else
	{
	  c = xition_switchers[PW_XITION_SWITCHER_DATA]->provide_estimate();
	}
    return c;
  }


  void pw_tlm_generic_payload::compute_xitions(tlm_bit_transition_tracker *new_bus)
  {
    const int DEFAULT_ADDRESS_XITIONS_64 = 15;
    const int DEFAULT_DATA_XITIONS_64 = 32;


    assert(new_bus); // for now ... need to support other modes.
    m_src_transtracker = new_bus;

      
    
    if (m_flags & PW_TGP_ADDRESS)
      {
	int va = (m_src_transtracker) ? m_src_transtracker->address_transitions(get_address()): DEFAULT_ADDRESS_XITIONS_64;
	m_xitions += va;
      }
    if (m_flags & PW_TGP_DATA)
      {
	int vd = (m_src_transtracker) ? m_src_transtracker->data_transitions(get_data_ptr(), get_data_length(), 64): DEFAULT_DATA_XITIONS_64 * (get_data_length()+7)/8;
	m_xitions += vd;
      }
    
    // TODO ... lanes
    
    // TODO mpxd address/data busses ... address bits sit on top of data write bits.
  }


  int sc_pwr::tlm_bit_transition_tracker::address_transitions(sc_dt::uint64 new_address)
  {
    if (TEMPKILL2 || new_address == m_old_address) return 0;
    xitions_t c = 0;
    if (g_kill_conf || xition_switchers[PW_XITION_SWITCHER_ADDR]->measurement_needed())
      {
	sc_dt::uint64 l_delta = both_edges_count ? (new_address ^ m_old_address) : (new_address & ~m_old_address);
	for (int i=0;i<64/8;i++)
	  {
	    c += pw_tallytable256[l_delta & 0xFF];
	    l_delta >>= 8;
	  }  
	
	//printf("Address active transitions %i  in %llx \n", c, new_address);
	if (!g_kill_conf) xition_switchers[PW_XITION_SWITCHER_ADDR]->record_measurement(c);   
      }
    else 
      {
	c = xition_switchers[PW_XITION_SWITCHER_ADDR]->provide_estimate();
      }
    m_old_address = new_address;
    return c;
  }
  

  //
  // Some care must be taken with counting transitions.  Eg. the address bus runs in only one direction, but this field is not likely to be
  // changed in the response direction ... except bus bridges and VM units, so flags note which busses are active at which point.
  //
  // Separate read and write data busses are commonly used within a SoC, so separate transition_trackers should be used. We generalised
  // this with a new tracker at every hop.
  //
  // Transitions only occur as the value is launched on the bus, but the energy used is proportional to the sum of all hop lengths,
  // so we should count the transitions once and store this count to be combined with distance as the payload is forwarded onwards.
  //
  // The data pointer will not be null in the request part of a read - it addresses the buffer where the data should be placed.
  //
  pw_agent_record pw_tlm_generic_payload::pw_log_hop(sc_module *where, unsigned int new_flags, tlm_bit_transition_tracker *new_bus)
  {
    pw_agent_record p(where, this, new_bus);
    if (TEMPKILL1) return p;
    pw_length l_distance = PW_ZERO_LENGTH;
    assert(where);
    pw_info << "log_hop " << where->name() << ":" << where->kind() << "\n";


    if (m_whence)
      {
	if (where == m_whence)
	  {
	    pw_debug << "loghop : no movement: src and dest are both " 
		     << where->name() << pw_endl;
	    return p;
	  }
	pw_module_base *where_base = get_pw_module_base(*where);

	if (!where_base)
	  {
	    pw_info << "loghop: transaction visited un-annotated sc_module " 
		     << where->name() << pw_endl;
	  }
	else
	  {
	    pw_module_base::pw_parent_cache_entry *peer_info = where_base->lowest_common_parent(m_whence); 
	    l_distance = peer_info -> distance_to_peer;
	  }

	m_distance_accumulator += l_distance;

	// If we are entering a new bus then need to ckpt (any) energy so far and compute new xitions.
	// Sticky flags must be updated in between.

	if (new_bus || (new_flags & PW_TGP_ACCT_CKP)) pw_energy_checkpoint(where);

	// Update new sticky flags
#define UPDATE_FLAGS(STICKY_MASK) if (new_flags & STICKY_MASK) m_flags = (new_flags & STICKY_MASK) | (m_flags & ~ STICKY_MASK)
	UPDATE_FLAGS(PW_TGP_FIELD_STICKY_MASK);
	UPDATE_FLAGS(PW_TGP_STYLE_STICKY_MASK);
	UPDATE_FLAGS(PW_TGP_ACCT_STICKY_MASK);
	
	if (new_bus) compute_xitions(new_bus); // We have xitions for a new bus, but do not yet know how far along it we travel.
      }

    m_whence = where;    
    return p;
  }


  PW_TLM_PAYTYPE * pw_tlm_paytype_mm_t::allocate()
  {
    PW_TLM_PAYTYPE *r;
    if (freelist)
      {
	r = freelist;
	freelist = r->mm_next;
      }
    else 
      {
	r = new (PW_TLM_PAYTYPE )(this);
      }
    return r;
  }


  void pw_tlm_paytype_mm_t::free(PW_TLM_PAYTYPE *disposeme)
  {
    assert(!freelist);
    disposeme->mm_next = freelist;
    freelist = disposeme;
  }


    // Constructor
  tlm_bit_transition_tracker::tlm_bit_transition_tracker(sc_module *src):
    src(src)
    // Reset on elaboration : when net's have no transition history
      {
	int confidence_level = 1000;
	xition_switchers[PW_XITION_SWITCHER_ADDR] = new confidence_switcher<xitions_t>(0, "transition tracker", "addr", confidence_level);
	xition_switchers[PW_XITION_SWITCHER_DATA] = new confidence_switcher<xitions_t>(0, "transition tracker", "data", confidence_level);

	m_old_address = 0;
	for (int i=0; i<MAX_TRACKED_DATA_WIDTH_BYTES; i++) m_old_databus[i] = 0;
      }


  pw_energy pw_tlm_generic_payload::get_dmi_energy_record() 
  {
    return m_dmi_energy_accumulator; 
  }

  void pw_agent_record::record_energy_use(const pw_energy &e, int account, bool noscale)
  {
    assert(module);
    sc_pwr::pw_module_base *l_mb = get_pw_module_base(*module); // TODO move to constructor?
    assert(l_mb);
    l_mb -> record_energy_use(e, account, noscale);
    //tlm::tlm_command cmd = this->get_command();
    // if (cmd == tlm::TLM_READ_COMMAND)
    
    // NOTE THE account IS CURRENTLY IGNORED FOR DMI TODO.
    // log it in component (dmi) power record

    assert(noscale); // for now     // TODO if not noscale need to scale it

    payload -> m_dmi_energy_accumulator += e;
  }


} // end namespace

// eof
