/******************************************************************************
*                                                                             *
*   Copyright 2005 University of Cambridge Computer Laboratory.               *
*                                                                             *
*   This file is part of Nprobe.                                              *
*                                                                             *
*   Nprobe is free software; you can redistribute it and/or modify            *
*   it under the terms of the GNU General Public License as published by      *
*   the Free Software Foundation; either version 2 of the License, or         *
*   (at your option) any later version.                                       *
*                                                                             *
*   Nprobe is distributed in the hope that it will be useful,                 *
*   but WITHOUT ANY WARRANTY; without even the implied warranty of            *
*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the             *
*   GNU General Public License for more details.                              *
*                                                                             *
*   You should have received a copy of the GNU General Public License         *
*   along with Nprobe; if not, write to the Free Software                     *
*   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA *
*                                                                             *
******************************************************************************/


#include <linux/config.h>
#include <linux/module.h>

#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/bitops.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/in.h>
#include <linux/tty.h>
#include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/if_arp.h>
#include <linux/if_slip.h>
#include <linux/init.h>

#include <linux/proc_fs.h>
#include <linux/fs.h>

#include "nprobe.h"

#ifdef MODULE
#define NETPROBE_VERSION    "0.1-iap10"
#else
#define	NETPROBE_VERSION    "0.1-iap10"
#endif


#define BUFFER_SIZE (1536 + 32 + 2) /* it'll be a 2KB thing anyway...*/


static np_t * np = NULL;

volatile int hint = 0;          /* used to speed dev_alloc_skb */

int nprobe_maxdev = 1;		/* Can be overridden with insmod! */
MODULE_PARM(nprobe_maxdev, "i");

static void reset_fifo();

/************************************************************************/


/* Since we use the file operations struct, we can't use the special proc
 * output provisions - we have to use a standard read function, which is
 * this function */
static ssize_t proc_output(  struct file *file,   /* The file read */ 
                         char *buf, /* The buffer to put data to (in the
                                     * user segment) */
                         size_t len,
			 loff_t offset)  /* The length of the buffer */
{
  static int finished = 0;
  int i;
  char message[256], temp[128];

printk("NPROBE: proc_output: fin %d len %d\n",finished,len);

  /* We return 0 to indicate end of file, that we have no more information.
   * Otherwise, processes will continue to read from us in an endless loop. */
  if (finished) {
    finished = 0;
    return 0;
  }

  /* We use put_user to copy the string from the kernel's memory segment
   * to the memory segment of the process that called us. get_user, BTW, is
   * used for the reverse. */
  sprintf(message, "nprobe: 0x%p %d %d %ld\n", 
	  np, nprobe_maxdev, FIFO_SIZE, np->num_bufs);

  for(i=0;i<US_CHANNELS;i++)
    {
      sprintf(temp,"%d : tous %d,%d : frus %d,%d\n",
	     i,
	     np->x[i].tous_in, np->x[i].tous_out,
	     np->x[i].frus_in, np->x[i].frus_out );
      strcat(message, temp);
    }


  for(i=0; i<len && message[i]; i++) 
    put_user(message[i], buf+i);


printk("NPROBE: proc_output: = X%sX\n",message);

  /* Notice, we assume here that the size of the message is below len, or
   * it will be received cut. In a real life situation, if the size of the
   * message is less than len then we'd return len and on the second call 
   * start filling the buffer with the len+1'th byte of the message. */
  finished = 1; 








  return i;  /* Return the number of bytes "read" */
}


/* This function receives input from the user when the user writes to
 * the /proc file. */
static ssize_t proc_input(  struct file *file,   /* The file itself */
                        const char *buf,     /* The buffer with the input */
                        size_t length,
			loff_t offset)          /* The buffer's length */
{

#if 0
  int i;

  /* Put the input into Message, where module_output will later be 
   * able to use it */
  for(i=0; i<MESSAGE_LENGTH-1 && i<length; i++)
    Message[i] = get_user(buf+i);
  Message[i] = '\0';  /* we want a standard, zero terminated string */
  
  /* We need to return the number of input characters used */
  return i;
#else

  // writing to proc file causes a FIFO reset

  printk("NPROBE: proc input called, length %d, string XXX%sXXX\n",
	 length, buf);

  reset_fifo();
  return length; // infinite sink
#endif

}


/* This function decides whether to allow an operation (return zero) or
 * not allow it (return a non-zero which indicates why it is not allowed).
 *
 * The operation can be one of the following values:
 * 0 - Execute (run the "file" - meaningless in our case)
 * 2 - Write (input to the kernel module)
 * 4 - Read (output from the kernel module)
 *
 * This is the real function that checks file permissions. The permissions
 * returned by ls -l are for referece only, and can be overridden here. 
 */
static int proc_perms(struct inode *inode, int op)
{
printk("NPROBE: proc_permission\n");
  /* We allow everybody to read from our module, but only root (uid 0) 
   * may write to it */ 
  if (op == 4 || (op == 2 && current->euid == 0))
    return 0; 

  /* If it's anything else, access is denied */
  return -EACCES;
}


/* The file is opened - we don't really care about that, but it does mean
 * we need to increment the module's reference count. */
static int proc_open(struct inode *inode, struct file *file)
{

  MOD_INC_USE_COUNT;

  printk("NPROBE: proc_open : now open %d times\n", __this_module.uc.usecount.counter );

#if 1 /* paranoia to detect copy-on-write behaviour */
  if( __this_module.uc.usecount.counter >1 )
    {
      int x;
    
      printk("NPROBE: stage2 magic found %x %x\n",np->magic,np->end_magic);

      if( np->magic != NP_MAGIC2 ||  np->end_magic != NP_MAGIC2 )
	{
	  printk("NPROBE: stage2 magic was incorrect.\n");
	}

      np->magic     = NP_MAGIC3;
      np->end_magic = NP_MAGIC3;
    
    }

#endif

   if( __this_module.uc.usecount.counter == 1 )
     {
       // reset magics to stage1
       np->end_magic = np->magic = NP_MAGIC;
     }
 
  return 0;
}


/* The file is closed - again, interesting only because of the reference
 * count. */
static void proc_close(struct inode *inode, struct file *file)
{
  printk("NPROBE: proc_close. Was open %d times\n",
	 __this_module.uc.usecount.counter);

  // the following works around a horible situation where after the
  // us-client forks it tries to shut a single fd twice.
  if( __this_module.uc.usecount.counter > 0 )
    MOD_DEC_USE_COUNT;
}



/* File operations for our proc file. This is where we place pointers
 * to all the functions called when somebody tries to do something to
 * our file. NULL means we don't want to deal with something. */
static struct file_operations File_Ops_4_Our_Proc_File =
  {
    NULL,  /* lseek */
    proc_output,  /* "read" from the file */
    proc_input,   /* "write" to the file */
    NULL,  /* readdir */
    NULL,  /* select */
    NULL,  /* ioctl */
    NULL,  /* mmap */
    proc_open,    /* Somebody opened the file */
    proc_close    /* Somebody closed the file */
    /* etc. etc. etc. (they are all given in /usr/include/linux/fs.h).
     * Since we don't put anything here, the system will keep the default
     * data, which in Unix is zeros (NULLs when taken as pointers). */
  };



/* Inode operations for our proc file. We need it so we'll have some
 * place to specify the file operations structure we want to use, and
 * the function we use for permissions. It's also possible to specify
 * functions to be called for anything else which could be done to an
 * inode (although we don't bother, we just put NULL). */
static struct inode_operations Inode_Ops_4_Our_Proc_File =
  {
    &File_Ops_4_Our_Proc_File,
    NULL, /* create */
    NULL, /* lookup */
    NULL, /* link */
    NULL, /* unlink */
    NULL, /* symlink */
    NULL, /* mkdir */
    NULL, /* rmdir */
    NULL, /* mknod */
    NULL, /* rename */
    NULL, /* readlink */
    NULL, /* follow_link */
    NULL, /* readpage */
    NULL, /* writepage */
    NULL, /* bmap */
    NULL, /* truncate */
    proc_perms /* check for permissions */
  };

#define PROC_NAME "nprobe"

/* Directory entry */
static struct proc_dir_entry Our_Proc_File = 
  {
    0, /* Inode number - ignore, it will be filled by 
        * proc_register_dynamic */
    sizeof(PROC_NAME)-1, /* Length of the file name */
    PROC_NAME, /* The file name */
    S_IFREG | S_IRUGO | S_IWUSR, /* File mode - this is a regular file which 
                        * can be read by its owner, its group, and everybody
                        * else. Also, its owner can write to it.
                        *
                        * Actually, this field is just for reference, it's
                        * module_permission that does the actual check. It 
                        * could use this field, but in our implementation it
                        * doesn't, for simplicity. */
    1,  /* Number of links (directories where the file is referenced) */
    0, 0,  /* The uid and gid for the file - we give it to root */
    0, /* The size of the file reported by ls. */
    &Inode_Ops_4_Our_Proc_File, /* A pointer to the inode structure for
                                 * the file, if we need it. In our case we
                                 * do, because we need a write function. */
    NULL  /* The read function for the file. Irrelevant, because we put it
           * in the inode structure above */
  }; 


/************************************************************************/


inline static void reset_skb( struct sk_buff * skb )
{
  // set skb fields as though it had just been allocated
  skb->tail = skb->data = skb->head + 16;  // skb_reserve(skb,16);
  skb->end = skb->head + skb->truesize;
  skb->len = 0;             
}


static void reset_fifo( void)
{
  int i,j;
  int c[US_CHANNELS];

  // this is very scary -- should have a proper mutex around it !!!!!

  printk("NPROBE: reset_fifo with %d bufs.\n",np->num_bufs);
  
  cli();

  for(i=0;i<US_CHANNELS;i++)
    c[i] = 0;

  for(i=0, j=0; i<np->num_bufs; i++, j=(j+1) % US_CHANNELS )
    {
      int x;
      x = c[j]++;
      np->frus_skb[j][x] = np->bufs[i];      
    }
  
  for(i=0;i<US_CHANNELS;i++)
    {
      np->x[i].frus_in = c[i];
      np->x[i].tous_in = np->x[i].tous_out = np->x[i].frus_out = 0;
    }
  
  sti();

  for(i=0;i<US_CHANNELS;i++)
    {
      printk("NPROBE: [%d] reset_fifo tous %d,%d : frus %d,%d\n",
	     i,
	     np->x[i].tous_in, np->x[i].tous_out,
	     np->x[i].frus_in, np->x[i].frus_out );
    }


}
  

#ifdef MODULE
static int nprobe_init(void)
#else	/* !MODULE */
__initfunc(int nprobe_init(struct device *dummy))
#endif	/* !MODULE */
{
    int status = 0;
    int i,j;

    printk(KERN_INFO "NPROBE: version %s, max=%d.\n",
	   NETPROBE_VERSION, nprobe_maxdev );

    np = (np_t *) kmalloc( sizeof( np_t ), GFP_KERNEL);
    if ( np == NULL)
    {
	printk("NPROBE: Can't allocate memory!\n");
	return -ENOMEM;
    }

    printk("NPROBE: allocated structure at %p\n",np );
    
    memset( np, 0, sizeof( np_t ) );

    np->end_magic = np->magic = NP_MAGIC;

    for(i=0;i<US_CHANNELS;i++)
      {	
	spin_lock_init(&(np->x[i].lock));
      }

    // paranoia code
    for ( i=0; i<FIFO_SIZE; i++ )
      {
	for(j=0;j<US_CHANNELS;j++)
	  {
	  np->frus_skb[j][i] = (struct sk_buff *) 0x05555555;
	  np->tous_skb[j][i] = (struct sk_buff *) 0x0aaaaaaa;
	  }
      }

    
    for ( i=0; i<FIFO_SIZE; i++ )
    {
      struct sk_buff * skb;
      
      skb = dev_alloc_skb( BUFFER_SIZE );

      if ( skb == NULL )
	{
	  printk("NPROBE: Only able to allocate %d buffers.\n",i );
	  break;
	}	
      np->bufs[i] = skb;
    }

    np->num_bufs = i;
    printk("NPROBE: Got %d buffers.\n",i );

    reset_fifo(); // setup pointers etc.

    /* get us a /proc entry */
    if ( proc_register(&proc_root, &Our_Proc_File) )
      printk("NPROBE: Unable to register proc entry.\n" );


#ifdef MODULE
    return status;
#else
    /* Return "not found", so that dev_init() will unlink
     * the placeholder device entry for us.
     */
    return ENODEV;
#endif
      }



#ifdef MODULE

int
init_module(void)
{
	return nprobe_init();
}

void
cleanup_module(void)
{
  int i;

  printk(KERN_INFO "NPROBE: Unload called.\n");

  for ( i=0; i<np->num_bufs; i++ )
    {
      dev_kfree_skb( np->bufs[i] );
    }


  kfree(np);

  proc_unregister(&proc_root, Our_Proc_File.low_ino);


}
#endif /* MODULE */



inline int hash( struct sk_buff * skb )
{
  u32 ports;

  //return (skb->data[23]&1);  // good for debug with flood ping stream

  // simple hash function that will probably work pretty well:
  // xor together the lsbs of the tcp/udp port info.

  // skip the IP header and look at tcp/udp port info
  // NB potential for unaligned access!

  ports = *(((unsigned long*)skb->data) + 5 ) ;

  // assume a Littel Endian machine

  return ( (ports>>8) ^ (ports>>24) ) % US_CHANNELS ;

}




/*
 *	Receive a packet from a device driver and queue it for the upper
 *	(protocol) levels.  It always succeeds. 
 */

void NP_netif_rx(struct sk_buff *skb)
{

  int chan;

#if 0
if( np->x[ABC].tous_in % 64 == 0 )
  {
    int i;
    for(i=0;i<FIFO_SIZE;i++)
	  {
	      printk("%d = %p %p\n",i,np->x[ABC].tous_skb[i],np->x[ABC].frus_skb[i] );
	  }  
  }
#endif

#if 0
  printk("NPROBE: skb %p : %p (%d) tous %d,%d \n",
	 skb,skb->data,skb->len,np->x[ABC].tous_in, np->x[ABC].tous_out);
#endif



  skb->stamp = xtime;	// XXX use RPCC for greater accuracy

  chan = hash( skb ); 

#if 1
  if(np->x[chan].tous_in % (8192*8) == 0 )
    printk("NPROBE: [%d] dev_alloc %p tous %d,%d : frus %d,%d\n",
	   chan,
	   np->frus_skb[chan][ np->x[chan].frus_out % FIFO_SIZE ],
	   np->x[chan].tous_in, np->x[chan].tous_out, 
	   np->x[chan].frus_in, np->x[chan].frus_out );
#endif

  spin_lock( &np->x[chan].lock );

  if( np->x[chan].tous_in - np->x[chan].tous_out < FIFO_SIZE )
    {
      np->tous_skb[chan][ np->x[chan].tous_in % FIFO_SIZE ] = skb;
      np->x[chan].tous_in++;   // XXX should really ensure ordering!!!!
    }
  else
    {
      printk(
	     "NPROBE: ToUs FIFO full! - tous %d,%d : frus %d,%d \n",
	     np->x[chan].tous_in, np->x[chan].tous_out, np->x[chan].frus_in, np->x[chan].frus_out );

      // this _really_ shouldn't happen. Let's free the skbuff for the
      // hell of it, but the reality is that we're probably going to
      // go down. hard.

      kfree_skb(skb);

    }

  spin_unlock( &np->x[chan].lock );

  hint = chan;          // set hint

}


/* 
 * This is called when network card shuts down.
 * Let's do nothing, and wait for reset_fifo to `rediscover' them.
*/

void NP_dev_kfree_skb( struct sk_buff * skb )
{
  return;
}

// assumes lock hels
static inline struct sk_buff * get_skb( int chan )
{
  struct sk_buff *skb = NULL;

  // check the return fifo. If nothing available then fail.


  if( np->x[chan].frus_in - np->x[chan].frus_out > 0 )
    {
      skb = np->frus_skb[chan][ np->x[chan].frus_out % FIFO_SIZE ];
      
#if 0
      if ( ( ((unsigned long)skb) & 0xf0000000 ) != 0xc0000000 )
	{
	  printk("NPROBE: READ frus %p\n",skb);
	  return NULL;
	}
      
      np->frus_skb[chan][ np->x[chan].frus_out % FIFO_SIZE ] =
	(struct sk_buff *) 0x0a5a5a5a;

#endif

      np->x[chan].frus_out++;   // XXX should really ensure ordering!!!!
	
      reset_skb( skb ); 	
    }  


  return skb;
}

struct sk_buff * NP_dev_alloc_skb(unsigned int length)
{
  struct sk_buff *skb = NULL;
  int i, start;

  start = hint;      // look at hint for where to try first

  i = start;

  do
    {
      spin_lock( &np->x[i].lock );
      skb = get_skb(i);
      spin_unlock( &np->x[i].lock );

      if (skb) return skb;

      i = (i+1) % US_CHANNELS ;    
    }
  while( i != start );

  //      printk("NPROBE: dev_alloc - nuke packet\n");
  // ifconfig stats will tell us about drops
  return NULL;
}


struct sk_buff *  NP_alloc_and_pass_up(struct sk_buff * recv_skb,
				       unsigned int length )
{
  struct sk_buff * new_skb;


  int chan;

#if 0
  printk("NPROBE: skb %p : %p (%d) tous %d,%d \n",
	 skb,skb->data,skb->len,np->x[ABC].tous_in, np->x[ABC].tous_out);
#endif


  recv_skb->stamp = xtime;	// XXX use RPCC for greater accuracy

  chan = hash( recv_skb ); 

#if 1
  if(np->x[chan].tous_in % (8192*8) == 0 )
    printk("NPROBE: [%d] alloc_and_up %p tous %d,%d : frus %d,%d\n",
	   chan,
	   np->frus_skb[chan][ np->x[chan].frus_out % FIFO_SIZE ],
	   np->x[chan].tous_in, np->x[chan].tous_out, 
	   np->x[chan].frus_in, np->x[chan].frus_out );
#endif

  spin_lock( &np->x[chan].lock );

  new_skb = get_skb(chan);

  if( ! new_skb )
    {
      int i;
      
      for(i=(chan+1) % US_CHANNELS; i!=chan; i = (i+1) % US_CHANNELS )
	{
	  spin_lock( &np->x[i].lock );
	  new_skb = get_skb(i);
	  spin_unlock( &np->x[i].lock );	  

	  if ( new_skb ) goto got_one;
	}

      // if we got here, we couldn't get a new skb
      // refresh the old one and return

      reset_skb(recv_skb);

      spin_unlock( &np->x[chan].lock );      
      return recv_skb;
    }

got_one:
  // we got a new skb, pass the old one up

  if( np->x[chan].tous_in - np->x[chan].tous_out < FIFO_SIZE )
    {
      np->tous_skb[chan][ np->x[chan].tous_in % FIFO_SIZE ] = recv_skb;
      np->x[chan].tous_in++;   // XXX should really ensure ordering!!!!
    }
  else
    {
      printk(
	     "NPROBE: ToUs FIFO full! - tous %d,%d : frus %d,%d \n",
	     np->x[chan].tous_in, np->x[chan].tous_out, np->x[chan].frus_in, np->x[chan].frus_out );

      // this _really_ shouldn't happen.
      // We're probably going to go down. hard.

      // kfree_skb(skb);

    }

  spin_unlock( &np->x[chan].lock );
  return new_skb;
}



















