/******************************************************************************
*                                                                             *
*   Copyright 2005 University of Cambridge Computer Laboratory.               *
*                                                                             *
*   This file is part of Nprobe.                                              *
*                                                                             *
*   Nprobe is free software; you can redistribute it and/or modify            *
*   it under the terms of the GNU General Public License as published by      *
*   the Free Software Foundation; either version 2 of the License, or         *
*   (at your option) any later version.                                       *
*                                                                             *
*   Nprobe is distributed in the hope that it will be useful,                 *
*   but WITHOUT ANY WARRANTY; without even the implied warranty of            *
*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the             *
*   GNU General Public License for more details.                              *
*                                                                             *
*   You should have received a copy of the GNU General Public License         *
*   along with Nprobe; if not, write to the Free Software                     *
*   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA *
*                                                                             *
******************************************************************************/


#include <sys/types.h>
#include <sys/socket.h>
#include <sys/time.h>
#include <sys/ioctl.h>
#include <netinet/in.h>
#include <net/if.h>
#include <assert.h>
#include <err.h>
#include <limits.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>

#include "skb.h"
#include <sk98_timers.h>
#include "probe.h"

/* This is the stuff necessary for run-time sk98 calibration. */

/* Notes on cards
 *
 * For the sk98 - reading the counter is expensive (you can't simply
 * get a pointer to the free-running counter, its byte swapped and de
 * referenced so the sk98 getcurtime does sort of what init_tstamp does
 * but it does work !
 *
 * */



// clock calculation
struct __clock_retimer_st {

    char *name; /* Of the interface we're attached to e.g. "eth2" */
    unsigned dev_num;

    enum {
	clock_mode_prestart,
	clock_mode_find_intercept,
	clock_mode_constant_output
    } mode;

    // next time for initial checking
    unsigned long nextdrift;

    // base time; ie at tic_base tics, the time actually was time_base
    struct timeval time_base;
    unsigned long long tic_base;

    unsigned long long tic_cur; /* Where we are at the moment,
				   monotonically increasing */
    unsigned tic_last; /* Where we are at the moment, wrapping at the
			  same time as the actual nictstamp. */

    unsigned int binned;  // number of consecutive binned samples

    // stats to get time intercept
    unsigned int samples;
    double sum;    // sum of diff
    double sum2;   // sum of diff*diff
    double tsum;   // sum of time
    double tsum2;  // sum of time*time
    double psum;   // sum of time*diff (product sum)
    double min;    // min(diff)
    double max;    // max(diff)

};



#if defined(SK98)
unsigned long drift_period = 1500000; // min= 48ms, max=0.432s
static unsigned long nictstamp_freq = 31250000;
#elif defined(ACENIC)
unsigned long drift_period = 50000; // min= 50ms, max=0.45s
static unsigned long nictstamp_freq = 1000000;
#else
unsigned long drift_period = 50000; // min= 50ms, max=0.45s
static unsigned long nictstamp_freq = 1000000;
#endif

static int do_clock_retime;
static FILE *drift_file;

#if defined(SK98)

static unsigned int
sk98_ioctl( const char * devname )
{
    unsigned int p;
    int ret, sock;
    struct ifreq ifr;

    /* Open a socket for our ioctls */
    if(!(sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_IP)))
    {
	printf("XFailed socket on %s\n", devname);
	return (uint) NULL;
    }

    /* Set up basic ioctl structure */
    ifr.ifr_data = (void*) &p;
    strcpy(ifr.ifr_name, devname);
    ret = ioctl(sock, SIOCDEVPRIVATE+0x4, &ifr);
    close(sock);

    if(ret)
    {
	printf("XFailed ioctl on %s\n", devname);
	perror("ioctl failed:");
	return (uint) NULL;
    }

    p = (unsigned int) *((unsigned int**)ifr.ifr_data);
    //printf("Did ioctl on %s, got %p\n", devname, p);

    return p;
}


/* XXX SOS22: Is this necessary? */
static void init_card_tstamps(const char *name)
{
    sk98_ioctl(name);
}


long card_tstamp_getcurtime( const char *name )
{
    return sk98_ioctl(name);
}

						   
#elif defined(ACENIC)

static unsigned long *
acenic_ioctl( char * devname )
{
    unsigned long *p;
    int sock;
    struct ifreq ifr;

    /* Open a socket for our ioctls */
    sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_IP);

    /* Set up basic ioctl structure */
    ifr.ifr_data = (void*) &p;
    strcpy(ifr.ifr_name, devname);
    if (ioctl(sock, SIOCDEVPRIVATE+0x2, &ifr))
    {
	printf("XFailed ioctl on %s\n", devname);
	perror("ioctl failed:");
	return NULL;
    }

    p = (unsigned long *) *((unsigned long**)ifr.ifr_data);


    printf("Did ioctl on %s, got %p\n", devname, p);

    return p;
}

typedef volatile unsigned long vul;
vul * ace_tstamp[4];
int init_card_tstamps(void)
{
    int rc,i;
    char buf[256];
    //char eth[][6]={"eth2","eth3","eth4","eth5"};

    for(i=0;i<eth_ifaces;i++)
    {	
	ace_tstamp[i] = acenic_ioctl(eth[i]);
	if( ace_tstamp[i] == NULL ) return -1;

	fprintf(stderr, "Acenic %d tstamp addr %p\n",
		i,ace_tstamp[i]);
	fprintf(stderr, "MAdeit!!\n");
    }

    return 0;
}


long card_tstamp_getcurtime( int iface )
{
    return * ace_tstamp[iface];
}

#else

static unsigned long *
acenic_ioctl( char * devname )
{
    return 0;
}

int init_card_tstamps(void)
{
    return 0;
}


long card_tstamp_getcurtime( int iface )
{
    return 0;
}

						   
#endif


/*************************************************************************/


static void record_tstamp_drift_file(unsigned long nictstamp,
				     struct timeval *now,
				     unsigned total_pkts,
				     unsigned dev_num)
{
    unsigned long data[5];
    if(drift_file) {
	data[0] = nictstamp;
	data[1] = (unsigned long) now->tv_sec;
	data[2] = (unsigned long) now->tv_usec;
	data[3] = total_pkts;
	data[4] = dev_num;
	fwrite(data, sizeof(unsigned long), 5, drift_file);
	fflush(drift_file);
    }
}

static void record_endrec_drift_file(struct timeval *timebase,
				     unsigned dev_num)
{
    unsigned long data[5];
    if(drift_file)
    {
	data[0] = 0;
	data[1] = (unsigned long) timebase->tv_sec;
	data[2] = (unsigned long) timebase->tv_usec;
	data[3] = 0;
	data[4] = dev_num;
	fwrite(data, sizeof(unsigned long), 5, drift_file);
	fflush(drift_file);
    }
}

/* Is x >= y, allowing for overflow? */
#define TSTAMP_GREATEREQ(x, y) \
	( (x) >= (y) ? ((x) - (y) < 1 << 30) : ((y) - (x) > 1 << 30) )

/* Decide how long we're going to wait before doing the next drift
   interval.  The randomness helps to avoid aliasing effects. */
static unsigned long drift_interval(void)
{
    return (unsigned long) (((drand48() * 8) + 1) * drift_period);
}

/* Convert a struct timeval to a double giving the time in seconds. */
static double tv_to_secs(const struct timeval *tv)
{
    return tv->tv_sec + (tv->tv_usec / 1000000.0);
}

/* Convert a double giving the time in seconds to a struct timeval. */
/* We round to the nearest microsecond. */
static void secs_to_tv(struct timeval *tv, double t)
{
    tv->tv_sec = (int)t;
    t -= tv->tv_sec;
    tv->tv_usec = (int)(t * 1000000 + 0.5);
    if (tv->tv_usec >= 1000000) {
	/* This can happen, due to rounding error. */
	tv->tv_sec ++;
	tv->tv_usec = 0;
    } else if (tv->tv_usec < 0) {
	tv->tv_sec --;
	tv->tv_usec += 1000000;
    }

    assert(tv->tv_usec >= 0 && tv->tv_usec < 1000000);
}

/* Reset the calibration */
static void reset_timer(clock_retimer_t *timer, unsigned long nictstamp,
			struct timeval *now)
{
    timer->tic_base = timer->tic_cur = nictstamp;
    timer->nextdrift = timer->tic_base + drift_interval();
    timer->time_base = *now;
    timer->samples = 0;
    timer->sum = 0;
    timer->sum2 = 0;
    timer->tsum = 0;
    timer->tsum2 = 0;
    timer->psum = 0;
    timer->max = 0;
    timer->min = 0;
}

clock_retimer_t *new_clock_retimer(const char *name, unsigned dev_num)
{
    clock_retimer_t *work;
    init_card_tstamps(name);
    work = calloc(sizeof(*work), 1);
    if (!work)
	err(1, "allocating timer recalibration structure");
    work->name = strdup(name);
    work->dev_num = dev_num;
    return work;
}

/* Given that we *just* received a packet with tstamp nictstamp, update
   the clock calibration.  You really want to call this as soon after
   receiving the packet as possible.

   Returns 1 if we just went from an uncalibrated to a calibrated
   state. */
int doTimer(clock_retimer_t *timer, unsigned long nictstamp,
	    unsigned total_pkts)
{
    struct timeval now; /* gettimeofday() time */
    struct timeval est_time; /* Estimated packet arrival time */
    double time; /* Difference between est_time and time, seconds */
    double tdiff; /* Difference between now and timer->time_base,
		     seconds */
    double s_xx;
    double s_xy;
    double s_yy;
    double grad;

    double m_x;
    double m_y;
    double inter;

    double s2_r;
    double uncerc2;

    if (timer->mode == clock_mode_prestart) {
	gettimeofday(&now, NULL);
	reset_timer(timer, nictstamp, &now);
	if(do_clock_retime)
	    timer->mode = clock_mode_find_intercept;
	else
	    timer->mode = clock_mode_constant_output;
	return 0;
    }

    if (!TSTAMP_GREATEREQ(nictstamp, timer->nextdrift)) {
	/* It's too early to do the next phase. */
	return 0;
    }

    gettimeofday(&now, NULL);

    timer->nextdrift += drift_interval();

    record_tstamp_drift_file(nictstamp, &now, total_pkts, timer->dev_num);

    if (timer->mode == clock_mode_constant_output) {
	/* That's all we need to do for now. */
	return 0;
    }

    getTime(timer, nictstamp, &est_time, NULL);

    time = tv_to_secs(&est_time) - tv_to_secs(&now);

    /* Okay, sanity check things a bit. */
    /* XXX TODO: The most common cause of these sanity checks failing
       is the clock frequency being wrong.  If they fail consistently,
       we never get enough data to recalibrate the clock.  Hence, if
       the clock is ever seriously wrong, we can't recover.  The
       correct procedure is probably to set a bit somewhere when one
       of these fails, and then recalculate the frequency anyway, but
       continue trying to recalibrate. */

    if(time > timer->max + 0.0005)
    {
	/* New maximum for the inter-packet arrival times, by a large
	   margin. */
	// bin all samples and start again
	if(timer->samples > 20)
	    fprintf(stderr, "%s: Losing %d samples - stale packets?\n",
		    timer->name, timer->samples);
	reset_timer(timer, nictstamp, &now);
	printf("<dump samples (%f > %f)>\n", time, timer->max);
	return 0;
    }
    if(time < timer->min - 0.0005)
    {
	/* New minimum for inter-packet arrival times, by a large
	 * margin. */
	// bin this sample
	timer->binned++;
	if((timer->binned % 20) == 19)
	{
	    fprintf(stderr,
		    "%s: Consistently binning samples.  Resetting data.\n",
		    timer->name);
	    reset_timer(timer, nictstamp, &now);
	}
	return 0;
    }

    /* Sample looks good, start processing it. */

    timer->binned = 0;

    tdiff = tv_to_secs(&now) - tv_to_secs(&timer->time_base);

    /* Add it to the accumulated statistics */
    timer->samples++;
    timer->sum += time;
    timer->sum2 += (time * time);
    timer->tsum += tdiff;
    timer->tsum2 += (tdiff * tdiff);
    timer->psum += tdiff * time;
    if(timer->samples == 1 || timer->min > time)
	timer->min = time;
    if(timer->samples == 1 || timer->max < time)
	timer->max = time;

    if (timer->samples < 160) {
	/* Not enough samples available for calibration, yet. */
	return 0;
    }


    /* Interesting times:

       -- We start a calibration run, system clock (A)
       -- A packet arrives, sk98 clock (B)
       -- A packet arrives, system clock (C)

       tdiff = C - A
       time = C - B */
    /* The actual core calibration.  This is a fairly standard linear
       regression.  We think that tdiff is a function of time, and we
       want to find the equation.  (Remember that tdiff is the system
       clock time between calibration start and packet arrival, while
       time is the difference between measured system time shortly
       after packet arrival and estimated time of packet arrival.)*/
    s_xx = timer->tsum2 -
	(timer->tsum * timer->tsum / (double) timer->samples);
    s_xy = timer->psum -
	(timer->tsum * timer->sum / (double) timer->samples);
    s_yy = timer->sum2 -
	(timer->sum * timer->sum / (double) timer->samples);
    grad = s_xy / s_xx;
    /* m stands for mean, here, rather than gradient */
    m_x = timer->tsum / (double) timer->samples;
    m_y = timer->sum / (double) timer->samples;
    inter =  m_y - (grad * m_x);

    /* We now believe that time =~= inter + grad * tdiff.  Now, at
       tdiff = 0, inter provides a pretty good estimate of the time
       between a packet getting stamped on the card and its getting a
       system time stamp.  It is therefore the amount which we should
       subtract from the time base in order to get the actual packet
       arrival times.  The gradient gives a pretty good guide to the
       error in the clock frequency. */

    /* Estimate how bad a fit we have.  Again, the interesting thing
       is the uncertainty in inter. */
    s2_r = s_yy - (grad * grad * s_xx) / (timer->samples - 2);
    uncerc2 = s2_r * (1 + 1.0 / timer->samples + m_x * m_x / s_xx);
    fprintf(stderr, "Timer uncertainty %f (r statistic %f)\n", sqrt(uncerc2),
	    s2_r);

    fprintf(stderr, "Time adjustment %f, grad %e, s_xy %e.\n", inter, grad, s_xy);

    /* Guess the nictstamp frequency from the data we've collected. */
    nictstamp_freq = ((1.0 + grad) * (double) nictstamp_freq)+0.5;

    if (grad > 0.0000001 || grad < -0.0000001) {
	/* We don't trust the calculated intercept if we've had to
	   change nictstamp_freq by more than a tiny amount, so
	   reset. */
	fprintf(stderr, "Resetting tstamp freq to %ld.\n", nictstamp_freq);
	reset_timer(timer, nictstamp, &now);
	return 0;
    }

    fprintf(stderr, "Happy with nictstamp_freq %ld.\n", nictstamp_freq);

    if(timer->max - timer->min > 0.001)
    {
	/* Strange: large variation in inter-packet arrival times.
	   This may or may not indicate a problem. */
	fprintf(stderr,
		"\n\nXXXX: %s: setting timer based on shifting clock (%g ms)\n\n",
		timer->name, 1000.0 * (timer->max - timer->min));
    }
    else
    {
	fprintf(stderr,
		"%s: calibrating timer (span %g us), inter %f s.  Apparent tics = %lu (current %lu)\n",
		timer->name, 1e6 * (timer->max - timer->min),
		inter,
		(unsigned long) (((1.0 + grad) * (double) nictstamp_freq)+0.5),
		nictstamp_freq);
    }

    /* Adjust the time base as appropriate */
    secs_to_tv(&timer->time_base,
	       tv_to_secs(&timer->time_base) - inter);

    record_endrec_drift_file(&timer->time_base, timer->dev_num);
    timer->mode = clock_mode_constant_output;

    return 1;
}

void getTime(clock_retimer_t *timer, unsigned long nictstamp, struct timeval *tv, struct timespec *ts)
{
    unsigned long long tdiff;
    struct timespec t;
    unsigned long long tics;

    // handle nictstamp wrap
    if(nictstamp < timer->tic_last)
	tdiff = ((unsigned long long) nictstamp + UINT_MAX) - (unsigned long long) timer->tic_last;
    else
	tdiff = (unsigned long long) nictstamp - (unsigned long long) timer->tic_last;
    timer->tic_last = nictstamp;

    timer->tic_cur += tdiff;

    /* What we'd like to do is:

       double t;
       t = (timer->tic_cur - timer->tic_base) / nictstamp_freq;
       t.tv_sec = (int)t;
       t.tv_nsec = (t - (int)t) * 1000000000;

       Unfortunately, doubles don't have a big enough mantissa for
       that to be safe, so we have to fart about with fixed point
       arithmetic, like so. */
    tics = timer->tic_cur - timer->tic_base;
    t.tv_sec = tics / nictstamp_freq;
    t.tv_nsec = (1000000000ull * (tics % nictstamp_freq)) /
	nictstamp_freq;
    assert(t.tv_nsec >= 0);
    assert(t.tv_nsec < 1000000000);

    t.tv_sec += timer->time_base.tv_sec;
    t.tv_nsec += timer->time_base.tv_usec * 1000;
    if (t.tv_nsec >= 1000000000) {
	t.tv_sec ++;
	t.tv_nsec -= 1000000000;
    }
    assert(t.tv_nsec >= 0);
    assert(t.tv_nsec < 1000000000);

    /* Assert that the time we've estimated is close to the current
       time.  This will have to change if we ever use this code on
       offline traces. */
    /* It turns out this isn't a good thign to assert: we can have
       stale packets on the ring when we start up, and they tend to
       confuse things.
    */
#if 0
#ifndef NDEBUG
    {
	unsigned n;
	n = time(NULL);
	assert(t.tv_sec >= n - 60);
	assert(t.tv_sec <= n + 60);
    }
#endif
#endif

    if(tv) {
	tv->tv_sec = t.tv_sec;
	tv->tv_usec = (t.tv_nsec + 500) / 1000;
	if (tv->tv_usec >= 1000000) {
	    tv->tv_sec--;
	    tv->tv_usec -= 1000000;
	}
	assert(tv->tv_usec >= 0);
	assert(tv->tv_usec < 1000000);
    }

    if(ts) {
	memcpy(ts, &t, sizeof(t));
    }

    return;
}

unsigned long skb_to_nictstamp(const struct sk_buff *skb)
{
    const unsigned char *ipdata = skb->data;
#if defined(SK98)
    return (ipdata[-11]<<24) | (ipdata[-12]<<16) |
	(ipdata[-13]<<8) | ipdata[-14] ;
#elif defined(ACENIC)
    return (ipdata[-14]<<24) | (ipdata[-13]<<16) |
	(ipdata[-12]<<8) | ipdata[-11] ;
#else
    return 0;
#endif
}

void initialise_timestamps(unsigned do_retime, unsigned long initial_freq,
			   const char *drift_fname)
{
    char hostname[256];
    time_t now = time(NULL);

    if (!do_retime)
	abort();
    do_clock_retime = do_retime;
    if (initial_freq != 0)
	nictstamp_freq = initial_freq;
    if (drift_fname != NULL) {
	drift_file = fopen(drift_fname, "w");
	if (drift_file == NULL)
	    err(1, "openning drift file %s", drift_fname);

	gethostname(hostname, 256);

	fprintf(drift_file, "Drift file, %s, %s\n", hostname, ctime(&now));
    }
}
