/******************************************************************************
 * acenic.c
 * 
 * All modifications copyright (c) 1999-2000, University of Cambridge,
 * by Ian Pratt and Keir Fraser (iap10@cl.cam.ac.uk, kaf24@cl.cam.ac.uk)
 * 
 * Please note the original copyright and licence below.
 */

/*
 * acenic.c: Linux driver for the Alteon AceNIC Gigabit Ethernet card
 *           and other Tigon based cards.
 *
 * Copyright 1998 by Jes Sorensen, <Jes.Sorensen@cern.ch>.
 *
 * Thanks to Alteon and 3Com for providing hardware and documentation
 * enabling me to write this driver.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * Additional work by Pete Wyckoff <wyckoff@ca.sandia.gov> for initial
 * Alpha and trace dump support.
 */

/* 
 * Alterations made for use with the Arsenic User-Safe Device project
 * by Tim Deegan <tjd21@cl.cam.ac.uk> starting in April 1999.
 * Further modifications by Ian Pratt and Keir Fraser (kaf24@cl.cam.ac.uk).
 * 
 * $Revision: 1.13 $
 * $Date: 2000/04/13 16:30:31 $
 */

#include <linux/stddef.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/ioport.h>
#include <linux/pci.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/delay.h>
#include <linux/mm.h>
#include <net/sock.h>
#include <net/ip.h>

#include <asm/system.h>
#include <asm/bitops.h>
#include <asm/io.h>
#include <asm/irq.h>
#include <asm/byteorder.h>
#include <asm/uaccess.h>

#include "acenic.h"
#include "acenic_proto.h"
#include "acenic_usd.h"

/* Include the next line to include watch timer trace. */
//#define INCLUDE_WATCH_TIMER

/* These must be defined before the firmware is included. */
#define MAX_TEXT_LEN	96*1024
#define MAX_RODATA_LEN	8*1024
#define MAX_DATA_LEN	2*1024

#include "alt_fw2.h"

#ifndef PCI_VENDOR_ID_ALTEON
#define PCI_VENDOR_ID_ALTEON		0x12ae	
#define PCI_DEVICE_ID_ALTEON_ACENIC	0x0001
#endif
#ifndef PCI_DEVICE_ID_3COM_3C985
#define PCI_DEVICE_ID_3COM_3C985	0x0001
#endif
#ifndef PCI_VENDOR_ID_NETGEAR
#define PCI_VENDOR_ID_NETGEAR		0x1385
#define PCI_DEVICE_ID_NETGEAR_GA620	0x620a
#endif


/* Default values for tuning parameters */
#define DEF_TX_RATIO	31
#define DEF_TX_COAL	TICKS_PER_SEC / 500
#define DEF_TX_MAX_DESC	7
#define DEF_RX_COAL	TICKS_PER_SEC / 10000
#define DEF_RX_MAX_DESC	2
#define DEF_TRACE	0
#define DEF_STAT	2 * TICKS_PER_SEC

static int link[8] = {0, };
static int trace[8] = {0, };
static int tx_coal_tick[8] = {0, };
static int rx_coal_tick[8] = {0, };
static int max_tx_desc[8] = {0, };
static int max_rx_desc[8] = {0, };
static int tx_ratio[8] = {0, };

static const char __initdata *version = 
"acenic.c: v0.32_usd ("__DATE__" "__TIME__").\n";

static struct net_device *root_dev = NULL;

static int probed __initdata = 0;

static void ace_rx2_int(struct net_device *dev);
static void ace_tx2_int(struct net_device *dev);

static int ace_install_filter(struct net_device *dev, char *filter, int len);


/******************************************************************************
 **** FIND AND INITIALISE ALL ACENIC BOARDS IN SYSTEM
 */

int __init acenic_probe (struct net_device *dev)
{
    int boards_found = 0;
    int version_disp = 0;
    struct ace_private *ap;
    u8 pci_latency;
    struct pci_dev *pdev = NULL;
   
    printk("Arsenic Application-Accessible Networking driver\n");
    printk("for Alteon ACEnic 1000baseSX (Tigon II rev 5, 1MB SRAM reqd)\n");
    printk("Original driver modified by Ian Pratt and Keir Fraser\n");
    printk(" ({Ian.Pratt,Keir.Fraser}@cl.cam.ac.uk\n");
    printk("All modifications by the above authors are:\n");
    printk("Copyright (c) 1999-2000 University of Cambridge, UK\n");

    /*
     * We don't probe if there's no PCI support, or if probing done already.
     */
    if ( test_and_set_bit(0, &probed) || !pci_present() ) return -ENODEV; 

    while ( (pdev = pci_find_class(PCI_CLASS_NETWORK_ETHERNET<<8, pdev)) )
    {
        dev = NULL;

        if ( !(((pdev->vendor == PCI_VENDOR_ID_ALTEON) &&
                (pdev->device == PCI_DEVICE_ID_ALTEON_ACENIC)) ||
               ((pdev->vendor == PCI_VENDOR_ID_3COM) &&
                (pdev->device == PCI_DEVICE_ID_3COM_3C985)) ||
               ((pdev->vendor == PCI_VENDOR_ID_NETGEAR) &&
                (pdev->device == PCI_DEVICE_ID_NETGEAR_GA620))) ) continue;

        /* display version info if adapter is found */
        if ( !version_disp )
        {
            /* set display flag to TRUE so that */
            /* we only display this string ONCE */
            version_disp = 1;
            printk(version);
        }

        printk(__FILE__ ": acenic_probe() at address %p\n", acenic_probe);

/*DBG*/ printk(__FILE__ ": calling init_etherdev(NULL, %#x)\n", 
               sizeof(struct ace_private));
        dev = init_etherdev(dev, sizeof(struct ace_private));
/*DBG*/ printk(__FILE__ ": init_etherdev() returned %#x\n", (u32)dev);

        if ( dev == NULL )
        {
            printk(KERN_ERR "Unable to allocate etherdev structure\n");
            break;
        }


        if ( (dev->priv == NULL) &&
             (dev->priv = kmalloc(sizeof(*ap), GFP_KERNEL)) == NULL )
        {
            return(-ENOMEM);
        }

        ap = dev->priv;
        ap->pdev         = pdev;
        ap->vendor       = pdev->vendor;
        ap->in_interrupt = 0;
        ap->harvest_lock = 0;
        ap->filter_lock  = 0;
        spin_lock_init(&ap->lock);
        
        dev->irq                = pdev->irq;
        dev->open               = ace_open;
        dev->hard_start_xmit    = ace_start_xmit;
        dev->stop               = ace_close;
        dev->get_stats          = ace_get_stats;
        dev->set_multicast_list = ace_set_multicast_list;
#ifdef ACENIC_DEBUGGING_IOCTLS
        dev->do_ioctl           = ace_do_ioctl;
#endif
        dev->set_mac_address    = ace_set_mac_addr;
        dev->change_mtu         = ace_change_mtu;

        /*
         * Dummy value.
         */
        dev->base_addr = 42;
                
        pci_read_config_word(pdev, PCI_COMMAND, &ap->pci_command);
        pci_read_config_byte(pdev, PCI_LATENCY_TIMER, &pci_latency);

        if ( pci_latency <= 0x40 )
        {
            pci_latency = 0x40;
            pci_write_config_byte(pdev, PCI_LATENCY_TIMER, pci_latency);
        }

        pci_set_master(pdev);

        switch( ap->vendor )
        {
        case PCI_VENDOR_ID_ALTEON:
            sprintf(ap->name, "AceNIC Gigabit Ethernet");
            printk(KERN_INFO "%s: Alteon AceNIC ", dev->name);
            break;
        case PCI_VENDOR_ID_3COM:
            sprintf(ap->name, "3Com 3C985 Gigabit Ethernet");
            printk(KERN_INFO "%s: 3Com 3C985 ", dev->name);
            break;
        case PCI_VENDOR_ID_NETGEAR:
            sprintf(ap->name, "NetGear GA620 Gigabit Ethernet");
            printk(KERN_INFO "%s: NetGear GA620 ", dev->name);
            break;
        default:
            sprintf(ap->name, "Unknown AceNIC based Gigabit Ethernet");
            printk(KERN_INFO "%s: Unknown AceNIC ", dev->name);
            break;
        }
        printk("Gigabit Ethernet at 0x%08lx, irq %i, PCI latency %i "
               "clks\n", pdev->resource[0].start, dev->irq, pci_latency);


        /*
         * Remap the regs into kernel space.
         */
        ap->regs = (struct ace_regs *)__ioremap(pdev->resource[0].start,
                                                0x4000, _PAGE_USER);
        if ( !ap->regs )
        {
            printk(KERN_ERR "%s:  Unable to map I/O register, "
                   "AceNIC %i will be disabled.\n",
                   dev->name, boards_found);
            break;
        }


#ifdef MODULE
        if ( ace_init(dev, boards_found) ) continue;
#else
        if (ace_init(dev, -1)) continue;
#endif

        boards_found++;

        /*
         * This is bollocks, but we need to tell the net-init code that it
         * shall go for the next device.
         */
        dev->base_addr = 0;
    }

    /*
     * If we're at this point we're going through ace_probe() for the first
     * time.  Return success (0) if we've initialized 1 or more boards.
     * Otherwise, return failure (-ENODEV).
     */

    if ( dev ) printk("%s: returning from probe with %i boards\n", 
                      dev->name, boards_found);

#ifdef MODULE
    return(boards_found);
#else
    if ( boards_found == 0 ) return(-ENODEV);
    return 0;
#endif
}



/******************************************************************************
 **** MODULE SETUP / CLEARUP
 */

#ifdef MODULE
#if LINUX_VERSION_CODE > 0x20118
MODULE_AUTHOR("Jes Sorensen <Jes.Sorensen@cern.ch>");
MODULE_DESCRIPTION("AceNIC/3C985 Gigabit Ethernet driver");
MODULE_PARM(link, "1-" __MODULE_STRING(8) "i");
MODULE_PARM(trace, "1-" __MODULE_STRING(8) "i");
MODULE_PARM(tx_coal_tick, "1-" __MODULE_STRING(8) "i");
MODULE_PARM(max_tx_desc, "1-" __MODULE_STRING(8) "i");
MODULE_PARM(rx_coal_tick, "1-" __MODULE_STRING(8) "i");
MODULE_PARM(max_rx_desc, "1-" __MODULE_STRING(8) "i");
#endif

int init_module(void)
{
    int cards;
    
    root_dev = NULL;

    cards = acenic_probe(NULL);
    if ( !cards ) printk(__FILE__ ": init_module returning -ENODEV\n");

    return cards ? 0 : -ENODEV;
}

void cleanup_module(void)
{
    struct ace_private *ap;
    struct ace_regs *regs;
    struct net_device *next;
    unsigned long flags;
    int i;

    while (root_dev)
    {
/*DBG*/	printk(__FILE__ ": cleaning up %s\n", root_dev->name);
        next = ((struct ace_private *)root_dev->priv)->next;
        ap = (struct ace_private *)root_dev->priv;

        regs = ap->regs;

/*DBG*/	printk(__FILE__ ":   -> unregistering device\n");
        unregister_netdev(root_dev);

/*DBG*/	printk(__FILE__ ":   -> freeing IRQ\n");
        free_irq(root_dev->irq, root_dev);

/*DBG*/	printk(__FILE__ ":   -> halting card CPUs\n");
        spin_lock_irqsave(&ap->lock, flags);                
	writel(1, &regs->DmaWriteState); // pause DMAs
	writel(1, &regs->DmaReadState);
        writel(readl(&regs->CpuCtrl) | CPU_HALT, &regs->CpuCtrl);
        writel(readl(&regs->CpuBCtrl) | CPU_HALT, &regs->CpuBCtrl);
        writel(0, &regs->Mb[0].lo);
        spin_unlock_irqrestore(&ap->lock, flags);

        printk(__FILE__ ":   -> freeing TX skbuffs\n");
	for (i = 0; i < TX2_RING_ENTRIES; i++) 
        {
            if ( ap->c0_tx2_skbuff[i] ) 
            {
                ap->chan0.tx2_ring[i].buf = 0;
                ap->chan0.tx2_ring[i].flagsize = 0;
                dev_kfree_skb(ap->c0_tx2_skbuff[i]);
                ap->c0_tx2_skbuff[i] = 0;
            }
	}

        printk(__FILE__ ":   -> freeing RX skbuffs\n");
	for (i = 0; i < RX2_RING_ENTRIES; i++) 
        {
            if ( ap->c0_rx2_skbuff[i] ) 
            {
                ap->chan0.rx2_ring[i].buf = 0;
                ap->chan0.rx2_ring[i].flagsize = 0;
                dev_kfree_skb(ap->c0_rx2_skbuff[i]);
                ap->c0_rx2_skbuff[i] = 0;
            }
	}

/*DBG*/	printk(__FILE__ ":   -> unmapping IO space\n");
        iounmap(regs);
        if ( ap->trace_buf ) kfree(ap->trace_buf);

/*DBG*/	printk(__FILE__ ":   -> freeing gen_info area\n");
        if ( ap->info ) kfree(ap->info);

/*DBG*/	printk(__FILE__ ":   -> freeing device state\n");
        kfree(root_dev);

/*DBG*/	printk(__FILE__ ": done\n");

        root_dev = next;
    }
}
#endif


/******************************************************************************
 **** COMMAND MECHANISM
 */

#define WAIT_100()                            \
{                                             \
    current->state = TASK_INTERRUPTIBLE;      \
    schedule_timeout(HZ/10); /* wait 100ms */ \
    current->state = TASK_RUNNING;            \
}

/* Add a simple command (no args) to the NIC command buffer */
static inline void ace_issue_cmd(struct ace_regs *regs, struct cmd *cmd)
{
    u32 idx   = readl(&regs->CmdPrd);
    u32 n_idx = (idx + 1) % CMD_RING_ENTRIES;
    while ( n_idx == readl(&regs->CmdCsm) ) WAIT_100();
    writel(*(u32 *)(cmd), &regs->CmdRng[idx]);
    writel((idx + 1) % CMD_RING_ENTRIES, &regs->CmdPrd);
}

/*
 * Similer to ace_issue_cmd(), but also adds <arglen> 32-byte arguments
 * to the command buffer, from <args>.
 */
static inline void ace_issue_cmd_with_args(struct ace_regs *regs,
                                           struct cmd *cmd,
                                           u32 *args, int arglen)
{
    int i = 0;
    u32 idx   = readl(&regs->CmdPrd);
    u32 n_idx = (idx + 1) % CMD_RING_ENTRIES;
    while ( n_idx == readl(&regs->CmdCsm) ) WAIT_100();
    writel(*(u32 *)(cmd), &regs->CmdRng[idx]);

    while ( arglen-- )
    {
        idx   = n_idx; 
        n_idx = (idx + 1) % CMD_RING_ENTRIES;
        while ( n_idx == readl(&regs->CmdCsm) ) WAIT_100();
        writel(args[i++], &regs->CmdRng[idx]);
    }

    writel(n_idx, &regs->CmdPrd);
}



/******************************************************************************
 **** FIRMWARE AND INTERFACE INITIALISATION (FOR A GIVEN ACENIC BOARD)
 */

static int __init ace_init(struct net_device *dev, int board_idx)
{
	struct ace_private *ap;
	struct ace_regs *regs;
	struct ace_info *info;
	struct ace_connection_info *con_zero;
	u32 tig_ver, mac1, mac2, tmp;
	unsigned long tmp_ptr, myjif;
	int i;
	char * perf_test_area = NULL;

	ap = dev->priv;
	regs = ap->regs;

        /* Might as wel initialise the packet filter engine here. */
        dpf_init();

	/* XXX Don't access any other registers before this point! */
#ifdef __BIG_ENDIAN
	writel(((BYTE_SWAP | WORD_SWAP | CLR_INT) |
		((BYTE_SWAP | WORD_SWAP | CLR_INT) << 24)),
	       &regs->HostCtrl);
#else
	writel((CLR_INT | WORD_SWAP | ((CLR_INT | WORD_SWAP) << 24)),
	       &regs->HostCtrl);
#endif

	mb();

	if ( (tig_ver = readl(&regs->HostCtrl) >> 28) != 6 )
        {
            /* We only support Tigon 2, as we depend on two CPUs. */
            printk(KERN_INFO"  Unsupported Tigon version detected (%i)\n",
                   tig_ver);
            return(-ENODEV);
        }
        printk(__FILE__ ": Tigon II (Rev. %i), Firmware: %i.%i.%i\n",
               tig_ver, tigon2FwReleaseMajor, tigon2FwReleaseMinor,
               tigon2FwReleaseFix);

	/* Stop the NIC CPUs and clear pending interrupts. */
	writel(1, &regs->DmaWriteState); // pause DMAs
	writel(1, &regs->DmaReadState);
	writel(1, &regs->CpuCtrl);       // smack reset
	writel(CPU_HALT, &regs->CpuCtrl);
	writel(0, &regs->Mb[0].lo);
        writel(1, &regs->CpuBCtrl);
        writel(CPU_HALT, &regs->CpuBCtrl);
        writel(SRAM_BANK_512K, &regs->LocalCtrl);
        writel(SYNC_SRAM_TIMING, &regs->MiscCfg);
        ap->version = 2;

	/*
	 * ModeStat _must_ be set after the SRAM settings as this change
	 * seems to corrupt the ModeStat and possible other registers.
	 * The SRAM settings survive resets and setting it to the same
	 * value a second time works as well. This is what caused the
	 * `Firmware not running' problem on the Tigon II.
	 */
#ifdef __LITTLE_ENDIAN
	writel(ACE_BYTE_SWAP_DATA | ACE_WARN | ACE_FATAL |
	       ACE_WORD_SWAP | ACE_NO_JUMBO_FRAG, &regs->ModeStat);
#else
#error "this driver doesn't run on big-endian machines yet!"
#endif

	mac1 = 0;
	for(i = 0; i < 4; i++){
		mac1 = mac1 << 8;
		mac1 |= read_eeprom_byte(regs, 0x8c+i);
	}
	mac2 = 0;
	for(i = 4; i < 8; i++){
		mac2 = mac2 << 8;
		mac2 |= read_eeprom_byte(regs, 0x8c+i);
	}

	writel(mac1, &regs->MacAddrHi);
	writel(mac2, &regs->MacAddrLo);

	printk(__FILE__ ": MAC address = %02x:%02x:%02x:%02x:%02x:%02x\n",
	       (mac1 >> 8) & 0xff, mac1 & 0xff, (mac2 >> 24) &0xff,
	       (mac2 >> 16) & 0xff, (mac2 >> 8) & 0xff, mac2 & 0xff);

	dev->dev_addr[0] = (mac1 >> 8) & 0xff;
	dev->dev_addr[1] = mac1 & 0xff;
	dev->dev_addr[2] = (mac2 >> 24) & 0xff;
	dev->dev_addr[3] = (mac2 >> 16) & 0xff;
	dev->dev_addr[4] = (mac2 >> 8) & 0xff;
	dev->dev_addr[5] = mac2 & 0xff;


	/*
	 * Set the max DMA transfer size. Seems that for most systems
	 * the performance is better when no MAX parameter is
	 * set. However for systems enabling PCI write and invalidate,
	 * DMA writes must be set to the L1 cache line size to get
	 * optimal performance.
	 */
	tmp = READ_CMD_MEM | WRITE_CMD_MEM | MEM_READ_MULTIPLE | 0x0000;

        /* XXXKAF DISABLED: this hurts receive performance. */
#if 0
        if ( ap->pci_command & PCI_COMMAND_INVALIDATE )
        {
            switch ( L1_CACHE_BYTES )
            {
            case 16:
                tmp |= DMA_WRITE_MAX_16;
                break;
            case 32:
                tmp |= DMA_WRITE_MAX_32;
                break;
            case 64:
                tmp |= DMA_WRITE_MAX_64;
                break;
            default:
                printk(KERN_INFO "  Cache line size %i not "
                       "supported, PCI write and invalidate "
                       "disabled\n", L1_CACHE_BYTES);
                ap->pci_command &= ~PCI_COMMAND_INVALIDATE;
                pci_write_config_word(ap->pdev, PCI_COMMAND,
                                      ap->pci_command);
            }
        }
#endif

	writel(tmp, &regs->PciState);
	if ( request_irq(dev->irq, ace_interrupt, SA_SHIRQ, ap->name, dev) ) 
        {
		printk(KERN_WARNING "%s: Requested IRQ %d is busy\n",
		       dev->name, dev->irq);
		return -EAGAIN;
	}

	/*
	 * Initialize the generic info block and the command+event rings
	 * and the control blocks for the transmit and receive rings
	 * as they need to be setup once and for all.
	 */
	if ( !(info = kmalloc(sizeof(struct ace_info) + 
                              sizeof(struct ace_connection_info), 
                              GFP_KERNEL | GFP_DMA)))
	{

	  printk(KERN_INFO"  Unable to alloc ace_info/connection_info (%d)\n",
		 sizeof(struct ace_info) + sizeof(struct ace_connection_info));
          free_irq(dev->irq, dev);
          return -EAGAIN;
	}

	/*
	 * Register the device here to be able to catch allocated
	 * interrupt handlers in case the firmware doesn't come up.
	 */
	ap->next = root_dev;
	root_dev = dev;

	ap->info = info;

	memset(info, 0, sizeof(struct ace_info));

	/*
	 * Init the control block we'll use for the first USD connection
	 * (i.e. the kernel's one)
	 */
	
	con_zero = (struct ace_connection_info *)(info + 1);
	memset(con_zero, 0, sizeof(struct ace_connection_info));
	ap->conn_state[0] = con_zero;

	ace_load_firmware(dev);
	ap->fw_running = 0;

	tmp_ptr = virt_to_bus((void *)info);
#if (BITS_PER_LONG == 64)
	writel(tmp_ptr >> 32, &regs->InfoPtrHi);
#else
	writel(0, &regs->InfoPtrHi);
#endif
	writel(tmp_ptr & 0xffffffff, &regs->InfoPtrLo);

	memset(ap->evt_ring, 0, EVT_RING_ENTRIES * sizeof(struct event));


	set_aceaddr(&info->evt_ctrl.rngptr, ap->evt_ring);
	info->evt_ctrl.flags = 0;

	ap->evt_prd = 0;
	writel(0, &regs->EvtCsm);

	info->cmd_ctrl.flags = 0;
	set_aceaddr_bus(&info->cmd_ctrl.rngptr, (void *)0x100);
	info->cmd_ctrl.max_len = 0;

	for ( i = 0; i < CMD_RING_ENTRIES; i++ ) writel(0, &regs->CmdRng[i]);
	writel(0, &regs->CmdPrd);
	writel(0, &regs->CmdCsm);

	/* The TX2/RX2 rings */
	set_aceaddr(&con_zero->tx2_ring_ptr, ap->chan0.tx2_ring);
	con_zero->tx2_ring_size = TX2_RING_ENTRIES;    
	set_aceaddr(&con_zero->rx2_ring_ptr, ap->chan0.rx2_ring);
	con_zero->rx2_ring_size = RX2_RING_ENTRIES;

#define PERF_TEST
#ifdef PERF_TEST
	if ( !(perf_test_area = kmalloc(8192*2, GFP_KERNEL | GFP_DMA)))
	{
	  printk(KERN_INFO"  Unable to alloc perf test area\n");
	}
#endif

	tmp_ptr = virt_to_bus((void *)perf_test_area);
#if (BITS_PER_LONG == 64)
	writel(tmp_ptr >> 32, &regs->PerfTestPtrHi);
#else
	writel(0, &regs->PerfTestPtrHi);
#endif
	writel(tmp_ptr & 0xffffffff, &regs->PerfTestPtrLo);


	/*
	 * Connection zero has free rein to DMA from anywhere
	 */
	con_zero->range_base.addrhi = 0;
	con_zero->range_base.addrlo = 0;
	con_zero->range_length      = 0xffffffff;

	/*
	 * New USD command scheme uses the memory window.
	 */
	writel(USD_CMD_BASE, &regs->WinBase);
	memset((char *) regs->usd2_ctrl, 0,
	       sizeof(usd2_ctrl_t)*USD_CHANNELS);

	/*
	 * Potential item for tuning parameter
	 */
	writel(DMA_THRESH_8W, &regs->DmaReadCfg);
	writel(DMA_THRESH_8W, &regs->DmaWriteCfg);

	writel(0, &regs->MaskInt);
	writel(1, &regs->IfIdx);
	writel(1, &regs->AssistState);

	writel(DEF_STAT, &regs->TuneStatTicks);

	writel(DEF_TX_COAL, &regs->TuneTxCoalTicks);
	writel(DEF_TX_MAX_DESC, &regs->TuneMaxTxDesc);
	writel(DEF_RX_COAL, &regs->TuneRxCoalTicks);
	writel(DEF_RX_MAX_DESC, &regs->TuneMaxRxDesc);
	writel(DEF_TRACE, &regs->TuneTrace);
	writel(DEF_TX_RATIO, &regs->TxBufRat);


	if (board_idx >= 8) {
		printk(KERN_WARNING "%s: more then 8 NICs detected, "
		       "ignoring module parameters!\n", dev->name);
		board_idx = -1;
	}

	if (board_idx >= 0) {

		if (tx_coal_tick[board_idx])
			writel(tx_coal_tick[board_idx],
			       &regs->TuneTxCoalTicks);
		if (max_tx_desc[board_idx])
			writel(max_tx_desc[board_idx], &regs->TuneMaxTxDesc);

		if (rx_coal_tick[board_idx])
			writel(rx_coal_tick[board_idx],
			       &regs->TuneRxCoalTicks);
		if (max_rx_desc[board_idx])
			writel(max_rx_desc[board_idx], &regs->TuneMaxRxDesc);

		if (trace[board_idx])
              		writel(trace[board_idx], &regs->TuneTrace);

		if ((tx_ratio[board_idx] >= 0) && (tx_ratio[board_idx] < 64))
			writel(tx_ratio[board_idx], &regs->TxBufRat);
	}

	/*
	 * Default link parameters
	 */
	tmp = LNK_ENABLE | LNK_FULL_DUPLEX | LNK_1000MB | LNK_100MB |
		LNK_10MB | LNK_RX_FLOW_CTL_Y | LNK_NEG_FCTL | LNK_NEGOTIATE;
	if(ap->version == 2)
		tmp |= LNK_TX_FLOW_CTL_Y;

	/*
	 * Override link default parameters
	 */
	if ((board_idx >= 0) && link[board_idx]) {
		int option = link[board_idx];


		tmp = LNK_ENABLE;

		if (option & 0x01){
			printk(KERN_INFO "%s: Setting half duplex link\n",
			       dev->name);
			tmp &= ~LNK_FULL_DUPLEX;
		}
		if (option & 0x02)
			tmp &= ~LNK_NEGOTIATE;
		if (option & 0x10)
			tmp |= LNK_10MB;
		if (option & 0x20)
			tmp |= LNK_100MB;
		if (option & 0x40)
			tmp |= LNK_1000MB;
		if ((option & 0x70) == 0){
			printk(KERN_WARNING "%s: No media speed specified, "
			       "forcing auto negotiation\n", dev->name);
			tmp |= LNK_NEGOTIATE | LNK_1000MB |
				LNK_100MB | LNK_10MB;
		}
		if ((option & 0x100) == 0)
			tmp |= LNK_NEG_FCTL;
		else
			printk(KERN_INFO "%s: Disabling flow control "
			       "negotiation\n", dev->name);
		if (option & 0x200)
			tmp |= LNK_RX_FLOW_CTL_Y;
		if ((option & 0x400) && (ap->version == 2)){
			printk(KERN_INFO "%s: Enabling TX flow control\n",
			       dev->name);
			tmp |= LNK_TX_FLOW_CTL_Y;
		}
	}


	writel(tmp, &regs->TuneLink);
        writel(tmp, &regs->TuneFastLink);
        writel(tigon2FwStartAddr, &regs->Pc);

	writel(0, &regs->Mb[0].lo);

        /*
         * Write addresses into ace_info that the card will access later...
         */
        set_aceaddr(&info->evt_prd_ptr, &ap->evt_prd);
        set_aceaddr(&info->stats2_ptr, &info->s.stats);
        set_aceaddr(&info->connection_zero, con_zero); 


	/*
	 * Start the NIC CPU
	 */
/*DBG*/ printk(__FILE__ ": Connection 0 state at %#x\n", (u32)con_zero);
	writel(readl(&regs->CpuCtrl) & ~(CPU_HALT|CPU_TRACE), &regs->CpuCtrl);

	/*
	 * Wait for the firmware to spin up - max 3 seconds.
	 */
	myjif = jiffies + 3 * HZ;
	while (time_before(jiffies, myjif) && !ap->fw_running)
        {
            current->state = TASK_INTERRUPTIBLE;
            schedule_timeout(HZ/10); /* wait 100ms */
            current->state = TASK_RUNNING;
        }

	if (!ap->fw_running)
        { 
            printk(KERN_ERR "%s: Firmware NOT running!\n", dev->name);
            writel(readl(&regs->CpuCtrl) | CPU_HALT, &regs->CpuCtrl);
            return -EBUSY;
	}


/*DBG*/	printk( __FILE__ ":  PciState = %08x\n",
		readl(&regs->PciState) );
		
       // can now free the PCI performance test area 
// XXXXXXXX       if( perf_test_area ) kfree( perf_test_area );

#if 0 // this is how it *should* be done
	
        /* OK, let's really try and tell the card about connection 0 */
	acenic_new_connection (
			       dev,                   /* struct net_device *dev */ 
			       &ap->chan0,            /* void *buffer */
			       USD_ENDPOINT_MIN_SIZE, /* u32 size */
			       /* usd_setup_callback_t *callback */
			       0, /* void *argument */
			       NULL, /* usd_irq_callback_t *rx_irq_call */
			       NULL, /* usd_irq_callback_t *tx_irq_call */
			       NULL, /* usd_teardown_callback_t */
			       0, /* u8   *dest_mac_addr */
			       0, /* u32   src_ip_addr */
			       0, /* u32   dest_ip_addr */
			       0, /* u16   src_port */
			       0, /* u16   dest_port */
			       0  /* u8    ip_proto */
			       );
#endif

	/*
	 * We load the ring here as there seem to be no way to tell the
	 * firmware to wipe the ring without re-initializing it.
	 */
	if ( (i = ace_load_rx2_ring(dev)) )
	{
/*DBG*/     printk(__FILE__ ": leaving ace_init() (failure)\n");
	    return i;
	}

/*DBG*/	printk(__FILE__ ": leaving ace_init(). (success)\n");
	return 0;
}

#ifdef INCLUDE_WATCH_TIMER	
/*
 * Watch thread
 */
static void ace_timer(unsigned long data)
{
    struct net_device *dev = (struct net_device *)data;
    struct ace_private *ap = (struct ace_private *)dev->priv;

    u32 xxx=12345;
    struct sk_buff *skb;

    skb = ap->c0_tx2_skbuff[(ap->c0_tx_producer-1)&(TX2_RING_ENTRIES-1)];
    
    if( skb && ((u32)skb) != 0xdeadbeef && skb->sk)
    {
        xxx = skb->sk->wmem_alloc.counter ;
    }

    printk("[%s] Watch Thread: busy %ld, full %d, prod = %x (%x on card), oldcons = %x, newcons = %x, mask = %d, sockdata = %d, ref = %04x, irq = %04x\n",
           dev->name, dev->tbusy, ap->tx2_full,
           ap->c0_tx_producer, ap->regs->usd2_ctrl[0].tx_prod,
           ap->c0_old_tx_consumer,
           ap->regs->usd2_ctrl[0].tx_cons, 
           ap->tx_int_mask[0]&1,
           xxx,
           ap->regs->usd2_ctrl[0].tx_ref,
           ap->DEBUG_last_irq_tx_cons);
    
    ap->watch_timer.expires = jiffies + (5/2*HZ);
    add_timer(&ap->watch_timer);
}
#endif

/*
 * Load the standard rx ring.
 */
static int ace_load_rx2_ring(struct net_device *dev)
{
	struct ace_private *ap;
	struct ace_regs *regs;
	struct ace_info *info;
	unsigned long flags;
	short i;

/*DBG*/ printk(__FILE__ ": entering " __FUNCTION__ "().\n");

	ap = (struct ace_private *)dev->priv;
	regs = ap->regs;
	info = ap->info;

	spin_lock_irqsave(&ap->lock, flags);

	//XXXX Init stuff that should probably be elsewhere! IAP

	// TX2 stuff
	ap->tx2_full = 0;  
	ap->c0_tx_producer = ap->c0_old_tx_consumer = 0;

	// RX2 stuff
	ap->c0_rx_consumer = 0;
	ap->c0_free_producer = 0;
	
	for ( i = 0; i < RX_RING_THRESH; i++ ) 
        {
		struct sk_buff *skb;
		if ( !(skb = alloc_skb(ACE_MTU + ETH_HLEN + 6, GFP_ATOMIC)) ) 
                { 
		    printk(__FILE__ ": " __FUNCTION__ "(): "
			   "alloc_skb() returned NULL.\n");
		    writel(readl(&regs->CpuCtrl) | CPU_HALT, &regs->CpuCtrl);
		    writel(readl(&regs->CpuBCtrl) | CPU_HALT, &regs->CpuBCtrl);

		    for ( --i; i >= 0; i-- )
		    {
			dev_kfree_skb(ap->c0_rx2_skbuff[i]);
			ap->c0_rx2_skbuff[i] = NULL;
		    }

		    free_irq(dev->irq, root_dev);
		    spin_unlock_irqrestore(&ap->lock, flags);
		    return(-ENOMEM);
		}

		ap->c0_rx2_skbuff[i] = skb;
		/* Make sure the data contents end up on an aligned address. */
		skb_reserve(skb, 2);

		ap->chan0.rx2_ring[i].buf       = virt_to_bus(skb->data);
		ap->chan0.rx2_ring[i].flagsize  = 
		  (ACE_MTU + ETH_HLEN + 4) | RX2_DESCR_FLAGS_EOP;


	}
	ap->c0_free_producer = i;


	/* Init RX2 chan 0 */
	ap->conn_state[0]->callback_arg = (void *) dev;
	ap->conn_state[0]->rx_irq_callback_fn = (void *) ace_rx2_int;
	ap->rx_int_mask[0] = 1; // always enable these callbacks
	ap->regs->usd2_ctrl[0].rx_ref = 1; // request callback

	acenic_usd2_set_rx2_producer_index( dev, 0, i );
       
	/* Init TX2 chan 0 */
	ap->conn_state[0]->callback_arg = (void *) dev;
	ap->conn_state[0]->tx_irq_callback_fn = (void *) ace_tx2_int;
	ap->tx_int_mask[0] = 0; // always enable these callbacks	


	/***/
	spin_unlock_irqrestore(&ap->lock, flags);

/*DBG*/ printk(__FILE__ ": leaving " __FUNCTION__ " loaded %i bufs ().\n",i);
	return 0;
}



/*
 * All events are considered to be slow (RX/TX ints do not generate
 * events) and are handled here, outside the main interrupt handler,
 * to reduce the size of the handler.
 */
static u32 ace_handle_event(struct net_device *dev, u32 evtcsm, u32 evtprd)
{
    struct ace_private *ap;

    ap = (struct ace_private *)dev->priv;

    while ( evtcsm != evtprd )
    {
        switch (ap->evt_ring[evtcsm].evt){
        case E_FW_RUNNING:
            printk(KERN_INFO "%s: Firmware up and running\n",
                   dev->name);
            ap->fw_running = 1;
            break;
        case E_LNK_STATE:
        {
            u16 code = ap->evt_ring[evtcsm].code;
            if (code == E_C_LINK_UP){
                printk("%s: Optical link UP\n", dev->name);
            }
            else if (code == E_C_LINK_DOWN)
                printk("%s: Optical link DOWN\n",
                       dev->name);
            else
                printk("%s: Unknown optical link state %02x\n",
                       dev->name, code);
            break;
        }
        case E_ERROR:
            switch(ap->evt_ring[evtcsm].code){
            case E_C_ERR_INVAL_CMD:
                printk(KERN_ERR "%s: invalid command error\n",
                       dev->name);
                break;
            case E_C_ERR_UNIMP_CMD:
                printk(KERN_ERR "%s: unimplemented command "
                       "error\n", dev->name);
                break;
            case E_C_ERR_BAD_CFG:
                printk(KERN_ERR "%s: bad config error\n",
                       dev->name);
                break;
            case E_C_ERR_BAD_USD:
                            printk(KERN_ERR 
                                   "%s: F/W failed to add channel.\n", 
                                   dev->name);
                            add_connection_completion(dev, 0);
                            break;
            case E_C_ERR_INVALID_FILTER:
                printk(KERN_ERR 
                       "%s: F/W failed to install filter.\n", 
                       dev->name);
                break;
            default:
                
                printk(KERN_ERR "%s: unknown error %02x\n",
                       dev->name, ap->evt_ring[evtcsm].code);
            }
            break;
        case E_USD_CTXT_DELETED:
        {
            /* The card has deleted a connection.  How nice */
            int id = ((ap->evt_ring[evtcsm].code << 12) 
                      | ap->evt_ring[evtcsm].idx);
            printk("%s: F/W deleted channel %i\n", dev->name, id);
            if (ap->conn_state[id] != NULL)
            {
                (*ap->conn_state[id]->teardown_callback_fn)
                    (ap->conn_state[id]->callback_arg);
                kfree(ap->conn_state[id]);
                ap->conn_state[id] = NULL;
            }
            break;
        }
        case E_USD_CTXT_ADDED:
        {
            int id = ((ap->evt_ring[evtcsm].code << 12) 
                      | ap->evt_ring[evtcsm].idx);
            /* Hurrah! Our request has been processed. */
            printk("%s: F/W added channel %i\n",
                   dev->name, id);
            add_connection_completion(dev, id);
            break;
        }
        case E_USD_FILTER_INSTALLED:
        {
            int id = ((ap->evt_ring[evtcsm].code << 12) 
                      | ap->evt_ring[evtcsm].idx);
            /* Hurrah! Our request has been processed. */
            printk("%s: F/W installed new filter %i\n",
                   dev->name, id);
            ap->filter_lock = 0;
            if ( ap->filter_id ) del_connection_completion(dev, ap->filter_id);
            break;
        }
        default:
            printk(KERN_ERR "%s: Unhandled event 0x%02x\n",
                   dev->name, ap->evt_ring[evtcsm].evt);
        }
        evtcsm = (evtcsm + 1) % EVT_RING_ENTRIES;
    }
    
    return evtcsm;
}


static void ace_rx2_int(struct net_device *dev)
{
  struct ace_private *ap = (struct ace_private *)dev->priv;

  for ( ; ; )
  {
      struct sk_buff * skb, *newskb = NULL;
      int rxc = ap->c0_rx_consumer % RX2_RING_ENTRIES;
      int frp = ap->c0_free_producer % RX2_RING_ENTRIES;
      rx2_desc_t *rdesc = &ap->chan0.rx2_ring[rxc];
      rx2_desc_t *fdesc = &ap->chan0.rx2_ring[frp];
      
      if( rdesc->flagsize & RX2_DESCR_FLAGS_DONE )
      {
	
#if 0
	  { // NIC debugging stuff
	    static u8 seq;
	    
	    u8 c = ((u8*)(phys_to_virt(rdesc->buf)))[6];
	    
	    if( c != seq )
	      {
		printk("SEQFUK %02x %02x\n",c,seq);
	      }	  
	    seq = ++c;
	    if(seq>253)seq=0;
	    ((u8*)(phys_to_virt(rdesc->buf)))[6] = 0;
	  }
#endif


	  if( (rdesc->flagsize & RX2_DESCR_FLAGS_OK) &&
              (newskb = alloc_skb(dev->mtu + ETH_HLEN + 6, GFP_ATOMIC)) )
          {
              /* A valid packet... */
              int size = rdesc->flagsize & 0xffff;
              
              /* First pass up old skb for processing. */
              skb = ap->c0_rx2_skbuff[ rxc ];	
              ap->c0_rx2_skbuff[ rxc ] = 0;
              skb_put(skb, size);
              skb->dev = dev;
              skb->protocol = eth_type_trans(skb, dev);
              skb->ip_summed = CHECKSUM_UNNECESSARY/*NONE*/;
              netif_rx(skb);		/* send it up */

              ap->c0_rx_consumer++;

              ap->stats.rx_packets++;
              ap->stats.rx_bytes += skb->len;

              /* Now we preapre a descriptor for the new skb. */
              skb_reserve(newskb, 2);
              ap->c0_rx2_skbuff[frp] = newskb;
              fdesc->buf      = virt_to_bus(newskb->data);
              fdesc->flagsize = 
                  (dev->mtu + ETH_HLEN + 4) | RX2_DESCR_FLAGS_EOP;
              
              ap->c0_free_producer++;
          }
          else
          {
              /*
               * Urk! Either we couldn't alloc a new buffer, or NIC thinks
               * our buffer was duff.
               */
              printk(KERN_ERR "%s: Out of memory, or duff buffer : %08x %p\n",
                     dev->name, rdesc->flagsize, newskb);
              
              skb = ap->c0_rx2_skbuff[ rxc ];	
              ap->c0_rx2_skbuff[ rxc ] = 0;   // so we don't try to dealloc!
              ap->c0_rx_consumer++;
          
              ap->c0_rx2_skbuff[frp] = skb;
              
              fdesc->buf      = virt_to_bus(skb->data);
              fdesc->flagsize = (dev->mtu + ETH_HLEN + 4) | 
                  RX2_DESCR_FLAGS_EOP;
              
              ap->c0_free_producer++;
          }
      }
      else
      {
          /*
           * All filled bufs have been passed up, so now we update card regs
           * to tell it about the new empty buffers.
           */
          mbox_t *mb;
          ap->regs->usd2_ctrl[0].free_prod = ap->c0_free_producer;	    
          ap->regs->usd2_ctrl[0].rx_ref = ap->c0_rx_consumer+14; /*XXX14*/

          mb = &ap->regs->UsdMb[USD_MBOX_BASE];
          mb -= 16;
          mb->lo = 0; 

          set_bit( 0, & ap->rx_int_mask[0] );
	  return ; /* ALL DONE! */
      }
  }
}


static void ace_interrupt(int irq, void *dev_id, struct pt_regs *ptregs)
{
    struct net_device *dev = (struct net_device *)dev_id;
    struct ace_private *ap = (struct ace_private *)dev->priv;
    struct ace_regs *regs  = ap->regs;
    u32 evtcsm, evtprd;
    int i;	
        
    if ( test_and_set_bit(1, (volatile void *)&(ap->in_interrupt)) )
    {
        printk(KERN_ERR __FILE__ ": re-entered irq handler\n");
        return;
    }

    spin_lock(&ap->lock);

    /* Clean the interrupt */
    writel(0, &regs->Mb[0].lo);

    for ( i = 0; i < (USD_CHANNELS/32); i++ )
    {
        int e;

        /***** RX2 interrupt handler *******************/
        u32 m = ap->regs->rx_interrupts[i] & ap->rx_int_mask[i];
        while( (e = ffs(m)) )
        {
            int chan = --e + (32 * i);
                
            m &= ~(1<<e); 
                
            /* Clear bit in int mask until we're reprimed. */
            clear_bit(e, &ap->rx_int_mask[i]);
                
            /* Call back. */
            (*(ap->conn_state[chan]->rx_irq_callback_fn))
                (ap->conn_state[chan]->callback_arg);	      
        }
    
        /***** TX2 interrupt handler *******************/
        m = ap->regs->tx_interrupts[i] & ap->tx_int_mask[i];
        while( (e = ffs(m)) )
        {
            int chan = --e + (32 * i);

            m &= ~(1<<e); 

            /* Clear bit in int mask until we're reprimed. */
            clear_bit( e, &ap->tx_int_mask[i] );

            /* Call back. */
            (*(ap->conn_state[chan]->tx_irq_callback_fn))
		(ap->conn_state[chan]->callback_arg);	      
        }
    }

    /***** Event mechanism ************************/
    if( ap->regs->Interrupt )
    {
        ap->regs->Interrupt = 0;

        evtcsm = readl(&regs->EvtCsm);
        evtprd = ap->evt_prd;
        if ( evtcsm != evtprd ) 
        {
            evtcsm = ace_handle_event(dev, evtcsm, evtprd);
            writel(evtcsm, &regs->EvtCsm);
        }
    }
        
    spin_unlock(&ap->lock);
    ap->in_interrupt = 0;
}


static int ace_open(struct net_device *dev)
{
	struct ace_private *ap;
	struct ace_regs *regs;
	struct cmd cmd;
        unsigned int flags;

	ap = dev->priv;
	regs = ap->regs;

	if ( !(ap->fw_running) )
        {
            printk(KERN_WARNING "%s: firmware not running!\n", dev->name);
            return -EBUSY;
	}

        spin_lock_irqsave(&ap->lock, flags);

	writel(dev->mtu + ETH_HLEN + 4, &regs->IfMtu);

	cmd.evt = C_HOST_STATE;
	cmd.code = C_C_STACK_UP;
	cmd.idx = 0;
	ace_issue_cmd(regs, &cmd);

        ap->promisc = 0;
#if 0  /* XXXKAF REMOVED */
	if ( dev->flags & IFF_PROMISC )
        {
            cmd.evt = C_SET_PROMISC_MODE;
            cmd.code = C_C_PROMISC_ENABLE;
            cmd.idx = 0;
            ace_issue_cmd(regs, &cmd);
            ap->promisc = 1;
	}
#endif

	ap->mcast_all = 0;

	dev->tbusy     = 0;
	dev->interrupt = 0;
	dev->start     = 1;

	MOD_INC_USE_COUNT;

#ifdef INCLUDE_WATCH_TIMER
	/*
	 * Setup and kick off the watch timer
	 */
	init_timer(&ap->watch_timer);
	ap->watch_timer.data     = (unsigned long)dev;
	ap->watch_timer.function = ace_timer;
	ap->watch_timer.expires  = jiffies + (5/2*HZ);
	add_timer(&ap->watch_timer);
#endif

        spin_unlock_irqrestore(&ap->lock, flags);
	return 0;
}


static int ace_close(struct net_device *dev)
{
	struct ace_private *ap = (struct ace_private *)dev->priv;
	struct ace_regs *regs;
	struct cmd cmd;
        unsigned int flags;

        spin_lock_irqsave(&ap->lock, flags);

	dev->start = 0;
	set_bit(0, (void*)&dev->tbusy);

	regs = ap->regs;

#ifdef INCLUDE_WATCH_TIMER
	del_timer(&ap->watch_timer);
#endif

#if 0 /* XXXKAF REMOVED */
	if ( ap->promisc )
        {
		cmd.evt = C_SET_PROMISC_MODE;
		cmd.code = C_C_PROMISC_DISABLE;
		cmd.idx = 0;
		ace_issue_cmd(regs, &cmd);
		ap->promisc = 0;
	}
#endif

	cmd.evt = C_HOST_STATE;
	cmd.code = C_C_STACK_DOWN;
	cmd.idx = 0;
	ace_issue_cmd(regs, &cmd);

	MOD_DEC_USE_COUNT;
        spin_unlock_irqrestore(&ap->lock, flags);
	return 0;
}


static void ace_tx2_int(struct net_device *dev)
{
    struct ace_private *ap = (struct ace_private *)dev->priv;
    u16 delta, delta_max   = 30000 / dev->mtu;
       
    ap->DEBUG_last_irq_tx_cons = ap->regs->usd2_ctrl[0].tx_cons; // XXX

    /* Clear up completed skbs, then work out how much is left in the ring. */
    harvest_tx2(dev);
    delta = ap->c0_tx_producer - ap->c0_old_tx_consumer;

    if ( (delta < TX2_RING_ENTRIES) && ap->tx2_full && dev->tbusy )
    {
        /* There's now space, so kick the bottom half. */
        ap->tx2_full = 0;
        dev->tbusy   = 0;
        mark_bh(NET_BH);
    }

    if( (delta >= delta_max) && !test_and_set_bit(0, &ap->tx_int_mask[0]) )
    {
        /*
         * We have a fair bit of stuff still outstanding, so make sure we 
         * get called back to clear it up.
         */
        ap->regs->usd2_ctrl[0].tx_ref = ap->c0_old_tx_consumer + delta_max;
        ap->regs->UsdMb[USD_MBOX_BASE].lo = 0; 
    }
}


static void harvest_tx2(struct net_device *dev)
{
    struct ace_private *ap = (struct ace_private *)dev->priv;
    u32 new_cons           = ap->regs->usd2_ctrl[0].tx_cons;

    if( test_and_set_bit(0, (void*)&ap->harvest_lock) != 0 )
    {
        printk("%s: Harvest busy.\n",dev->name);
        return;
    }

    while( ap->c0_old_tx_consumer != new_cons )
    {
        u32 idx = ap->c0_old_tx_consumer % TX2_RING_ENTRIES;
      
        ap->stats.tx_packets++;
        ap->stats.tx_bytes += ap->c0_tx2_skbuff[idx]->len;

        ap->chan0.tx2_ring[idx].buf = 0xdead0001;
        ap->chan0.tx2_ring[idx].flagsize = 0;

        dev_kfree_skb(ap->c0_tx2_skbuff[idx]);
        ap->c0_tx2_skbuff[idx] = NULL;

        ap->c0_old_tx_consumer++; // u16 variable
    }

    ap->harvest_lock = 0;
}


static int ace_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
	struct ace_private *ap = (struct ace_private *)dev->priv;
	unsigned long flags;
	unsigned long addr; 
	u32 pidx;

        if ( test_and_set_bit(0, (void*)&dev->tbusy) != 0 ) 
        {
	    printk("%s: Am busy. Bug out.\n",dev->name);
	    return(1);
        }

	spin_lock_irqsave(&ap->lock, flags);

	harvest_tx2(dev);	    	    

	pidx = ap->c0_tx_producer % TX2_RING_ENTRIES;

	if( (ap->c0_tx_producer != ap->c0_old_tx_consumer) &&
	    pidx == (ap->c0_old_tx_consumer % TX2_RING_ENTRIES) ) 
        {
            goto full_tx;
        }

	ap->c0_tx2_skbuff[pidx] = skb;
	addr = virt_to_bus(skb->data);
	ap->chan0.tx2_ring[pidx].buf      = addr;
	ap->chan0.tx2_ring[pidx].flagsize = skb->len | TX2_DESC_END;

	ap->c0_tx_producer++;

#ifdef ACENIC_DEBUGGING_IOCTLS    
	/* Make sure the command path hasn't been trampled by the debugging
	 * ioctls.  Important to build with the *same* copy of acenic.h as 
	 * the acenic driver... */
	writel(USD_CMD_BASE, &ap->regs->WinBase);
#endif

	/* Put the PROD into the relevant slot and kick the mailbox */
	ap->regs->usd2_ctrl[0].tx_prod = ap->c0_tx_producer; // U16 write
	ap->regs->UsdMb[USD_MBOX_BASE].lo = 0;

        /*
         * No need to test producer and consumer for equality here -- the 
         * ring can't be empty as we just added something! 
         */
	if ( ((ap->c0_tx_producer - ap->c0_old_tx_consumer) & 
              (TX2_RING_ENTRIES - 1)) == 0 )
        {
        full_tx:
//printk("FULL TX\n");
	    if ( !test_and_set_bit(0, &ap->tx_int_mask[0]) )
            {
		ap->regs->usd2_ctrl[0].tx_ref = ap->c0_tx_producer - 32;
		ap->regs->UsdMb[USD_MBOX_BASE].lo = 0; 
            }

	    ap->tx2_full = 1;
	    set_bit(0, (void*)&dev->tbusy);

	    spin_unlock_irqrestore(&ap->lock, flags);    
	    dev->trans_start = jiffies;
	    return(pidx == (ap->c0_tx_producer % TX2_RING_ENTRIES));
        }

#if 0
	if( (ap->tx_int_mask[0] & 1) == 0 )
        {
	    u16 delta = ap->c0_tx_producer - ap->c0_old_tx_consumer;
            u16 dmax  = 30000 / dev->mtu;
       
	    if( (delta >= dmax) && !test_and_set_bit(0, &ap->tx_int_mask[0]) )
            {
		ap->regs->usd2_ctrl[0].tx_ref = ap->c0_old_tx_consumer + dmax;
		ap->regs->UsdMb[USD_MBOX_BASE].lo = 0;
            }
        }
#endif

	spin_unlock_irqrestore(&ap->lock, flags);

        dev->tbusy       = 0;
	dev->trans_start = jiffies;
	return(0);
}


static int ace_change_mtu(struct net_device *dev, int new_mtu)
{
	struct ace_private *ap = dev->priv;
	struct ace_regs *regs = ap->regs;
        unsigned int flags;

/*DBG*/ printk(__FILE__ ": entering " __FUNCTION__ "().\n");

	if ( (new_mtu < 68) || (new_mtu > ACE_MTU) ) return -EINVAL;
	
        spin_lock_irqsave(&ap->lock, flags);
	writel(new_mtu + ETH_HLEN + 4, &regs->IfMtu);
	dev->mtu = new_mtu;
        spin_unlock_irqrestore(&ap->lock, flags);

/*DBG*/ printk(__FILE__ ": leaving " __FUNCTION__ "().\n");
	return 0;
}


/*
 * Set the hardware MAC address.
 */
static int ace_set_mac_addr(struct net_device *dev, void *p)
{
	struct ace_private *ap = dev->priv;
	struct sockaddr *addr=p;
	struct ace_regs *regs;
	u16 *da;
	struct cmd cmd;
        unsigned int flags;

/*DBG*/ printk(__FILE__ ": entering " __FUNCTION__ "().\n");

	if(dev->start)
		return -EBUSY;
        spin_lock_irqsave(&ap->lock, flags);
	memcpy(dev->dev_addr, addr->sa_data,dev->addr_len);

	da = (u16 *)dev->dev_addr;

	regs = ((struct ace_private *)dev->priv)->regs;
	writel(da[0], &regs->MacAddrHi);
	writel((da[1] << 16) | da[2], &regs->MacAddrLo);

	cmd.evt = C_SET_MAC_ADDR;
	cmd.code = 0;
	cmd.idx = 0;
	ace_issue_cmd(regs, &cmd);

        spin_unlock_irqrestore(&ap->lock, flags);
/*DBG*/ printk(__FILE__ ": leaving " __FUNCTION__ "().\n");
	return 0;
}


static void ace_set_multicast_list(struct net_device *dev)
{
#if 0
    XXXXXXXXXXXXX KAF REMOVED
	struct ace_private *ap = dev->priv;
	struct ace_regs *regs = ap->regs;
	struct cmd cmd;

	if ((dev->flags & IFF_ALLMULTI) && !(ap->mcast_all)) {
		cmd.evt = C_SET_MULTICAST_MODE;
		cmd.code = C_C_MCAST_ENABLE;
		cmd.idx = 0;
		ace_issue_cmd(regs, &cmd);
		ap->mcast_all = 1;
	} else if (ap->mcast_all){
		cmd.evt = C_SET_MULTICAST_MODE;
		cmd.code = C_C_MCAST_ENABLE;
		cmd.idx = 0;
		ace_issue_cmd(regs, &cmd);
		ap->mcast_all = 0;
	}

	if ((dev->flags & IFF_PROMISC) && !(ap->promisc)) {
		cmd.evt = C_SET_PROMISC_MODE;
		cmd.code = C_C_PROMISC_ENABLE;
		cmd.idx = 0;
		ace_issue_cmd(regs, &cmd);
		ap->promisc = 1;
	}else if (!(dev->flags & IFF_PROMISC) && (ap->promisc)){
		cmd.evt = C_SET_PROMISC_MODE;
		cmd.code = C_C_PROMISC_DISABLE;
		cmd.idx = 0;
		ace_issue_cmd(regs, &cmd);
		ap->promisc = 0;
	}

	/*
	 * For the time being multicast relies on the upper layers
	 * filtering it properly. The Firmware does not allow one to
	 * set the entire multicast list at a time and keeping track of
	 * it here is going to be messy.
	 */
	if ((dev->mc_count) && !(ap->mcast_all)) {
		cmd.evt = C_SET_MULTICAST_MODE;
		cmd.code = C_C_MCAST_ENABLE;
		cmd.idx = 0;
		ace_issue_cmd(regs, &cmd);
	}else if (!ap->mcast_all) {
		cmd.evt = C_SET_MULTICAST_MODE;
		cmd.code = C_C_MCAST_DISABLE;
		cmd.idx = 0;
		ace_issue_cmd(regs, &cmd);
	}
#endif
}


static struct net_device_stats *ace_get_stats(struct net_device *dev)
{
	struct ace_private *ap = dev->priv;

	ap->stats.rx_errors  = ap->info->s.nic_stats.ifInErrors;
	ap->stats.rx_dropped = ap->info->s.nic_stats.ifInDiscards;

	ap->stats.tx_errors  = ap->info->s.nic_stats.ifOutErrors;
	ap->stats.tx_dropped = ap->info->s.nic_stats.ifOutDiscards;
	
	ap->stats.multicast  = ap->info->s.nic_stats.ifHCInMulticastPkts;
	ap->stats.collisions = 
	    ap->info->s.nic_stats.dot3StatsMultipleCollisionFrames + 
	    ap->info->s.nic_stats.dot3StatsSingleCollisionFrames;
	
	return (&ap->stats);
}


void __init ace_copy(struct ace_regs *regs, void *src, u32 dest, int size)
{
    unsigned long tdest;
    u32 *wsrc;
    short tsize, i;

    while ( size > 0 )
    {
        tsize = min(((~dest & (ACE_WINDOW_SIZE - 1)) + 1),
                    min(size, ACE_WINDOW_SIZE));
        tdest = (unsigned long)&regs->Window +
            (dest & (ACE_WINDOW_SIZE - 1));
        writel(dest & ~(ACE_WINDOW_SIZE - 1), &regs->WinBase);

#ifdef __BIG_ENDIAN
#error "data must be swapped here"
#else
        wsrc = src;
        for ( i = 0; i < (tsize / 4); i++ )
        {
            writel(wsrc[i], tdest + i*4);
        }
#endif
        dest += tsize;
        src += tsize;
        size -= tsize;
    }
}


void __init ace_clear(struct ace_regs *regs, u32 dest, int size)
{
    unsigned long tdest;
    short tsize = 0, i;

    while ( size > 0 )
    {
        tsize = min(((~dest & (ACE_WINDOW_SIZE - 1)) + 1),
                    min(size, ACE_WINDOW_SIZE));
        tdest = (unsigned long)&regs->Window +
            (dest & (ACE_WINDOW_SIZE - 1));
        writel(dest & ~(ACE_WINDOW_SIZE - 1), &regs->WinBase);

        for ( i = 0; i < (tsize / 4); i++ )
        {
            writel(0, tdest + i*4);
        }

        dest += tsize;
        size -= tsize;
    }
}


/*
 * Download the firmware into the SRAM on the NIC
 *
 * This operation requires the NIC to be halted and is performed with
 * interrupts disabled and with the spinlock hold.
 */
int __init ace_load_firmware(struct net_device *dev)
{
	struct ace_private *ap;
	struct ace_regs *regs;

/*DBG*/ printk(__FILE__ ": entering " __FUNCTION__ "().\n");

	ap = (struct ace_private *)dev->priv;
	regs = ap->regs;

	if (!(readl(&regs->CpuCtrl) & CPU_HALTED)){
		printk(KERN_ERR "%s: trying to download firmware while the "
		       "CPU is running!\n", dev->name);
		return -EFAULT;
	}

	/*
	 * Do not try to clear more than 512KB or we end up seeing
	 * funny things on NICs with only 512KB SRAM
	 */
	ace_clear(regs, 0x2000, 0x80000-0x2000);
        ace_clear(regs, tigon2FwBssAddr, tigon2FwBssLen);
        ace_clear(regs, tigon2FwSbssAddr, tigon2FwSbssLen);
        ace_copy(regs, tigon2FwText, tigon2FwTextAddr,tigon2FwTextLen);
        ace_copy(regs, tigon2FwRodata, tigon2FwRodataAddr, tigon2FwRodataLen);
        ace_copy(regs, tigon2FwData, tigon2FwDataAddr,tigon2FwDataLen);

/*DBG*/ printk(__FILE__ ": leaving " __FUNCTION__ "().\n");
	return 0;
}


/*
 * The eeprom on the AceNIC is an Atmel i2c EEPROM.
 *
 * Accessing the EEPROM is `interesting' to say the least - don't read
 * this code right after dinner.
 *
 * This is all about black magic and bit-banging the device .... I
 * wonder in what hospital they have put the guy who designed the i2c
 * specs.
 *
 * Oh yes, this is only the beginning!
 */
static void eeprom_start(struct ace_regs *regs)
{
	u32 local = readl(&regs->LocalCtrl);

	udelay(1);
	local |= EEPROM_DATA_OUT | EEPROM_WRITE_ENABLE;
	writel(local, &regs->LocalCtrl);
	mb();
	udelay(1);
	local |= EEPROM_CLK_OUT;
	writel(local, &regs->LocalCtrl);
	mb();
	udelay(1);
	local &= ~EEPROM_DATA_OUT;
	writel(local, &regs->LocalCtrl);
	mb();
	udelay(1);
	local &= ~EEPROM_CLK_OUT;
	writel(local, &regs->LocalCtrl);
	mb();
}


static void eeprom_prep(struct ace_regs *regs, u8 magic)
{
	short i;
	u32 local;

	udelay(2);
	local = readl(&regs->LocalCtrl);
	local &= ~EEPROM_DATA_OUT;
	local |= EEPROM_WRITE_ENABLE;
	writel(local, &regs->LocalCtrl);
	mb();

	for (i = 0; i < 8; i++, magic <<= 1) {
		udelay(2);
		if (magic & 0x80) 
			local |= EEPROM_DATA_OUT;
		else
			local &= ~EEPROM_DATA_OUT;
		writel(local, &regs->LocalCtrl);
		mb();

		udelay(1);
		local |= EEPROM_CLK_OUT;
		writel(local, &regs->LocalCtrl);
		mb();
		udelay(1);
		local &= ~(EEPROM_CLK_OUT | EEPROM_DATA_OUT);
		writel(local, &regs->LocalCtrl);
		mb();
	}
}


static int eeprom_check_ack(struct ace_regs *regs)
{
	int state;
	u32 local;

	local = readl(&regs->LocalCtrl);
	local &= ~EEPROM_WRITE_ENABLE;
	writel(local, &regs->LocalCtrl);
	mb();
	udelay(2);
	local |= EEPROM_CLK_OUT;
	writel(local, &regs->LocalCtrl);
	mb();
	udelay(1);
	/* sample data in middle of high clk */
	state = (readl(&regs->LocalCtrl) & EEPROM_DATA_IN) != 0;
	udelay(1);
	mb();
	writel(readl(&regs->LocalCtrl) & ~EEPROM_CLK_OUT, &regs->LocalCtrl);
	mb();

	return state;
}


static void eeprom_stop(struct ace_regs *regs)
{
	u32 local;

	local = readl(&regs->LocalCtrl);
	local |= EEPROM_WRITE_ENABLE;
	writel(local, &regs->LocalCtrl);
	mb();
	udelay(1);
	local &= ~EEPROM_DATA_OUT;
	writel(local, &regs->LocalCtrl);
	mb();
	udelay(1);
	local |= EEPROM_CLK_OUT;
	writel(local, &regs->LocalCtrl);
	mb();
	udelay(1);
	local |= EEPROM_DATA_OUT;
	writel(local, &regs->LocalCtrl);
	mb();
	udelay(2);
	local &= ~EEPROM_CLK_OUT;
	writel(local, &regs->LocalCtrl);
	mb();
}


/*
 * Read a whole byte from the EEPROM.
 */
static u8 read_eeprom_byte(struct ace_regs *regs, unsigned long offset)
{
	u32 local;
	short i;
	u8 result = 0;

	if (!regs){
		printk(KERN_ERR "No regs!\n");
		return 0;
	}

	eeprom_start(regs);

	eeprom_prep(regs, EEPROM_WRITE_SELECT);
	if (eeprom_check_ack(regs)){
		printk("Unable to sync eeprom\n");
		return 0;
	}

	eeprom_prep(regs, (offset >> 8) & 0xff);
	if (eeprom_check_ack(regs))
		return 0;

	eeprom_prep(regs, offset & 0xff);
	if (eeprom_check_ack(regs))
		return 0;

	eeprom_start(regs);
	eeprom_prep(regs, EEPROM_READ_SELECT);
	if (eeprom_check_ack(regs))
		return 0;

	for (i = 0; i < 8; i++) {
		local = readl(&regs->LocalCtrl);
		local &= ~EEPROM_WRITE_ENABLE;
		writel(local, &regs->LocalCtrl);
		udelay(2);
		mb();
		local |= EEPROM_CLK_OUT;
		writel(local, &regs->LocalCtrl);
		udelay(1);
		mb();
		/* sample data mid high clk */
		result = (result << 1) |
			((readl(&regs->LocalCtrl) & EEPROM_DATA_IN) != 0);
		udelay(1);
		mb();
		local = readl(&regs->LocalCtrl);
		local &= ~EEPROM_CLK_OUT;
		writel(local, &regs->LocalCtrl);
		mb();
		if (i == 7){
			local |= EEPROM_WRITE_ENABLE;
			writel(local, &regs->LocalCtrl);
			mb();
		}
	}

	local |= EEPROM_DATA_OUT;
	writel(local, &regs->LocalCtrl);
	udelay(1);
	writel(readl(&regs->LocalCtrl) | EEPROM_CLK_OUT, &regs->LocalCtrl);
	udelay(2);
	writel(readl(&regs->LocalCtrl) & ~EEPROM_CLK_OUT, &regs->LocalCtrl);
	eeprom_stop(regs);

	return result;
}



/******************************************************************************
 **** USD CONNECTION SETUP AND TEARDOWN
 */

int
acenic_new_connection (struct net_device *dev, 
		       void *buffer,
		       u32 size,
		       usd_setup_callback_t *callback,
		       void *argument,
		       usd_irq_callback_t *rx_irq_call,
		       usd_irq_callback_t *tx_irq_call,
		       usd_teardown_callback_t *teardown_call,
		       u8   *dest_mac_addr,
		       u32   src_ip_addr,
		       u32   dest_ip_addr,
		       u16   src_port,
		       u16   dest_port,
		       u8    ip_proto)
{
    usd_endpoint_t             *ep = buffer;
    struct ace_private         *ap = dev->priv;
    struct ace_connection_info *cip;
    struct cmd                 cmd;
    unsigned long              flags;
    u16                        id;

    /* Sanity check args */
    if (callback == NULL 
	|| rx_irq_call == NULL
	|| tx_irq_call == NULL
	|| teardown_call == NULL
	|| buffer == NULL
	|| (size < USD_ENDPOINT_MIN_SIZE)
	|| (ip_proto != IPPROTO_TCP && ip_proto != IPPROTO_UDP))
    {
	printk(KERN_ERR "%s: bogus args to " __FUNCTION__ "().", dev->name);
	return -EINVAL;
    }

    /* Get some space for the state we keep on this connection */
    if (!(cip = kmalloc(sizeof(struct ace_connection_info), 
			GFP_KERNEL | GFP_DMA)))
    {
	printk(KERN_ERR "%s: can't kmalloc() new connection's state (size %d)\n",
	       dev->name, sizeof(struct ace_connection_info) ); 
	return -EAGAIN;
    }

    memset(cip, 0, sizeof(struct ace_connection_info));
    memset(ep, 0, sizeof(struct usd_endpoint));
    
    /* Fill in the kernel state for this connection: */
    /* The range of valid host addresses */
    set_aceaddr(&cip->range_base, buffer);
    cip->range_length = size;

    /* The TX2/RX2 rings */
    set_aceaddr(&cip->tx2_ring_ptr, ep->tx2_ring);
    set_aceaddr(&cip->rx2_ring_ptr, ep->rx2_ring);

    cip->tx2_ring_size = TX2_RING_ENTRIES;    //XXXXXXX
    cip->rx2_ring_size = RX2_RING_ENTRIES;    //XXXXXXX

    /* What packets must look like: 
     *
     * We get the addresses and port numbers in net-order (big endian),
     * but they need to be host-endian as they cross the PCI bus so they
     * will get made big-endian again properly on the card... 
     */
    cip->source_ip_addr   = ntohl(src_ip_addr);
    cip->dest_ip_addr     = ntohl(dest_ip_addr);
    cip->source_port      = ntohs(src_port);
    cip->dest_port        = ntohs(dest_port);

    /* For internal consumption */
    cip->callback_arg = argument;
    cip->setup_callback_fn    = callback;
    cip->rx_irq_callback_fn   = rx_irq_call;
    cip->tx_irq_callback_fn   = tx_irq_call;
    cip->teardown_callback_fn = teardown_call;

    /* From here on in there's a risk of races with other setups/teardowns */
    spin_lock_irqsave(&ap->lock, flags); 

    /* Find the first free connection number */
    for ( id = 1; id < USD_CHANNELS; id++ ) /* we know '0' is always busy */
    {
	if ( ap->conn_state[id] == NULL ) break;
    }
    if ( id == USD_CHANNELS )
    {
	printk(KERN_ERR "%s: too many connections open\n", dev->name);
	spin_unlock_irqrestore(&ap->lock, flags);
	kfree(cip);
	return -EBUSY;
    }

    /* Zero out the indexs in the shared area */
    memset( &(ap->regs->usd2_ctrl[id]), 0, sizeof(ap->regs->usd2_ctrl[id]) );

    clear_bit(id%32, &ap->rx_int_mask[id/32]); 
    clear_bit(id%32, &ap->tx_int_mask[id/32]); 

    /* Register the state. */
    ap->conn_state[id] = cip;

    /* Using code and index together for the (16 bit) id field */
    cmd.evt  = C_OPEN_USD_CONNECTION;
    cmd.code = (id & 0xfff000) >> 12;
    cmd.idx  = id & 0xfff;
    ace_issue_cmd_with_args(
        ap->regs, &cmd, (u32*)cip, 
        offsetof(struct ace_connection_info, shared_end) / 4);

    /* Race points passed successfully */
    spin_unlock_irqrestore(&ap->lock, flags);

    return 0;
}


static void add_connection_completion(struct net_device *dev, int id)
{
    /* Card has signalled completion of the USD connection setup.  
     * We need to invoke the callback */
    struct ace_private *ap = dev->priv;
    
    if ( ap->conn_state[id]->setup_callback_fn )
    {
	printk("%s: invoking callback\n", dev->name);
	(*ap->conn_state[id]->setup_callback_fn)
            (ap->conn_state[id]->callback_arg, id);
    } 
}


int acenic_del_connection (struct net_device *dev, int id)
{
    int err;
    struct ace_private *ap = dev->priv;

    if ((id <= 0) || (id >= USD_CHANNELS) || (ap->conn_state[id] == NULL))
    {
	printk(KERN_ERR "%s: bad channel # for delete: %i\n", dev->name, id);
	return -EINVAL;
    }

    if ( (err = ace_del_connection_filter(dev, id)) ) return err;
    printk("%s: deleting channel %i\n", dev->name, id);
    return(0);
}

static void del_connection_completion(struct net_device *dev, int id)
{
    struct ace_private *ap = dev->priv;
    struct cmd cmd;

    cmd.evt  = C_CLOSE_USD_CONNECTION;
    cmd.code = (id & 0xfff000) >> 12;
    cmd.idx  = id & 0xfff;

    ace_issue_cmd(ap->regs, &cmd);
}


#ifdef ACENIC_DEBUGGING_IOCTLS

/*
 *  I/F for debugging firmware through ioctls. We provide 2
 * ioctls, to read and write 32-bit values from the shared memory */

static int ace_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
    struct ace_private *ap;
    struct acenic_ioc_req rq;
    __volatile__ u32 *ptr;
    unsigned long flags;

    /* The Alteon card has a DMA engine on it -- allowing unprivileged
     * users to monkey with it would be foolish :)
     */
    if (current->euid != 0)
        return -EPERM;
    
    ap = (struct ace_private *)dev->priv;
    if (!ap) return -ENODEV;
    
    /* Go get the arguments */
    if (copy_from_user(&rq, ifr->ifr_data, sizeof(rq)))
        return -EFAULT;

    /* Check the arguments */
    if (((rq.cardoffset & 0x3)
        /* Not 32-bit aligned */
        || (rq.cardoffset > 0x3FFc)))   
        /* Larger than shared address space */
        return -EINVAL;

    /* Work out where in our memory that offset is */
    ptr  = ((u32 *)ap->regs);
    ptr += (rq.cardoffset / 4);
        
    switch(cmd) 
    {
    case ACENIC_IOCTL_READ_SMEM:
        /* Read data from the card. */
        spin_lock_irqsave(&ap->lock, flags);       
        rq.data = *ptr;
        spin_unlock_irqrestore(&ap->lock, flags);
        /* Send it back to the user */
        if (copy_to_user(ifr->ifr_data, &rq, sizeof(rq)))
            return -EFAULT;
        break;

    case ACENIC_IOCTL_WRITE_SMEM: 
        /* Send data to the card */
        spin_lock_irqsave(&ap->lock, flags);
        *ptr = rq.data;
        rq.data = *ptr;
        spin_unlock_irqrestore(&ap->lock, flags);       
        /* Send it back to the user */
        if (copy_to_user(ifr->ifr_data, &rq, sizeof(rq)))
            return -EFAULT;
        break;

    default:
        return -EOPNOTSUPP;
    }

    return 0;
}

#endif /* ACENIC_DEBUGGING_IOCTLS */

int ace_install_connection_filter(struct net_device *dev, 
                                  int id, struct dpf_ir *ir)
{
    u32 *filter, len;
    unsigned long flags;
    int err;
    struct ace_private *ap = dev->priv;
    unsigned long a, b;

    spin_lock_irqsave(&ap->lock, flags); 

    if ( test_and_set_bit(0, &ap->filter_lock) ) 
    {
        printk("Filter already being processed -- bug out\n");
        spin_unlock_irqrestore(&ap->lock, flags);
        return -EAGAIN;
    }
    ap->filter_id = 0; /* Only non-zero if deleting a connection! */

    rdtscl(a);
    if ( dpf_insert(ir, id) ) 
    {
        ap->filter_lock = 0;
        spin_unlock_irqrestore(&ap->lock, flags);
        return(-ENOMEM);
    }
    rdtscl(b); b -= a;
    filter = (u32 *)dpf_iptr;

    for ( len = 0; len < 4096; len++ )
    {
        if ( !filter[len] && !filter[len+1] && 
             !filter[len+2] && !filter[len+3] ) break;
    }
    len = (len+1) * 4; /* length in bytes */

    printk("AddConnFilter: Code at %p, length %d bytes, time %ld cycles\n", filter, len, b);

    if ( (err = ace_install_filter(dev, (char *)filter, len)) )
    {
        dpf_delete(id);
        ap->filter_lock = 0;
        spin_unlock_irqrestore(&ap->lock, flags);
        return err;
    }
    
    spin_unlock_irqrestore(&ap->lock, flags);
    return 0;
}

int ace_del_connection_filter(struct net_device *dev, int id)
{
    u32 *filter, len;
    unsigned long flags;
    int err;
    struct ace_private *ap = dev->priv;

    spin_lock_irqsave(&ap->lock, flags); 

    if ( test_and_set_bit(0, &ap->filter_lock) ) 
    {
        printk("Filter already being processed -- bug out\n");
        spin_unlock_irqrestore(&ap->lock, flags);
        return -EAGAIN;
    }
    ap->filter_id = id;

    dpf_delete(id);
    filter = (u32 *)dpf_iptr;

    for ( len = 0; len < 4096; len++ )
    {
        if ( !filter[len] && !filter[len+1] && 
             !filter[len+2] && !filter[len+3] ) break;
    }
    len = (len+1) * 4; /* length in bytes */

    printk("DelConnFilter: Code at %p, length %d bytes\n", filter, len);

    if ( (err = ace_install_filter(dev, (char *)filter, len)) )
    {
        ap->filter_lock = 0;
        spin_unlock_irqrestore(&ap->lock, flags);
        return err;
    }

    spin_unlock_irqrestore(&ap->lock, flags);
    return 0;
}

static int ace_install_filter(struct net_device *dev, char *filter, int len)
{
    struct cmd                 cmd;
    struct ace_private         *ap = dev->priv;
    int id = 0;
    char *kbuf;
    unsigned long addr;
    
    if( (kbuf = kmalloc(len, GFP_KERNEL)) == NULL )
    {
        printk(KERN_ERR "%s: Failed to upload Filter for id %i "
               "due to kmalloc failure.\n", dev->name, id);
        return -ENOMEM;
    }
    
    memcpy(kbuf, filter, len);
    
    addr = virt_to_bus((void *)kbuf);
    
    //XXXX FIX :: Remember to free kbuf later!!!
#if (BITS_PER_LONG == 64)
    ap->regs->NewRXFilterHi  = (u32)(((unsigned long)addr)>>32);
#else
    ap->regs->NewRXFilterHi  = 0;
#endif
    ap->regs->NewRXFilterLo  = (u32)addr;
    ap->regs->NewRXFilterLen = len;

    /* Using code and index together for the (16 bit) id field */
    cmd.evt  = C_USD_INSTALL_FILTER;
    cmd.code = (id & 0xfff000) >> 12;
    cmd.idx  = id & 0xfff;
    ace_issue_cmd(ap->regs, &cmd);
    
    return 0;
}


/*
 * XXXKAF: filter stuff -- this is just to get the right stuff linked in
 * from libdpf_kern.a
 */
void *__dpf_dummy[] = { dpf_begin, dpf_mkeq, dpf_mkshift, dpf_shifti, 
                        dpf_insert, dpf_delete };

