/******************************************************************************
 * skbuff.c
 * 
 *   Provides bottom-layer support for the Linux TCP/IP stack in user space.
 *   Does this via the Linux sk_buff structure.
 * 
 *   Copyright (c) 1999-2000, K A Fraser
 * 
 *   NB. None of the functions defined here are based on any version of the
 *       Linux kernel.
 */

#include <netinet/in.h>
#include "thread.h"
#include "hash.h"
#include <stdlib.h>
#include <errno.h>
#include "private.h"
#define sk_buff u_sk_buff
#define socket  u_socket

/*
 * Allows us to assert that TCP has called us with an exclusive lock on
 * the connection. This is good as we will mess with free lists, etc.
 */
#define SOCK_LOCKED(sk) \
    ( ((sk) == NULL) || \
      (((sk)->lock.m.mx_state & PTH_MUTEX_LOCKED) && \
       ((sk)->lock.m.mx_owner == pth_self())) )


/******************************************************************************
 * init_locked_tx_mem:
 *   Allocate and initialise the tx free lists for a connection.
 *
 *   Assumes <num_hdr_bufs> header spaces, starting at <hdr_space>, each of
 *   <hdr_size> bytes in length.
 *
 *   Also assumes a data area starting at <data_space>, of length <data_size>
 *
 *   MULTI-THREAD SAFE: called before pcb added to connection hash table.
 */
int init_locked_tx_mem(struct user_pcb *pcb, 
                       int num_hdr_bufs, 
                       int hdr_size, 
                       u_char *hdr_space,
                       int data_size,
                       u_char *data_space)
{
    int i;
    u_char *hdr, *data;

    /*
     * Link the header blocks together on a free list.
     */
    pcb->tx_free_hdr_list = (mem_block_t *)hdr_space;
    hdr = hdr_space;
    for ( i = 0; i < num_hdr_bufs; i++ )
    {
        mem_block_t *blk = (mem_block_t *)hdr;
        hdr += hdr_size;
        blk->next = (mem_block_t *)hdr;
    }
    ((mem_block_t *)(hdr - hdr_size))->next = NULL;
    
    /*
     * Hmmm... the data area could be done as a heap with variable-sized
     * blocks (perhaps a first-fit boundary-tag mechanism or similar) but
     * a more complex scheme will probably eventually be implemented, so let's
     * keep it easy for now :) XXX FIXED-SIZE BLOCKS!
     */
    pcb->tx_free_data_list = (mem_block_t *)data_space;
    data = data_space;
    for ( i = 0; i < (data_size / BYTES_PER_BUFFER); i++ )
    {
        mem_block_t *blk = (mem_block_t *)data;
        data += BYTES_PER_BUFFER;
        blk->next = (mem_block_t *)data;
    }
    ((mem_block_t *)(data - BYTES_PER_BUFFER))->next = NULL;

    /*
     * Initialise the tx queue.
     */
    skb_queue_head_init(&pcb->tx_queued_skbuffs);

    return(0);
}


/******************************************************************************
 * alloc_fixed_header:
 *   Allocate a pinned down header buffer.
 *
 *   MULTI-THREAD SAFE: called from TCP/IP with socket locked.
 */
u_char *alloc_fixed_header(struct user_pcb *pcb)
{
    mem_block_t *blk;

    ASSERT(SOCK_LOCKED(pcb->sk));        

    return(REMOVE_FROM_MEM_LIST(pcb->tx_free_hdr_list));
}


/******************************************************************************
 * free_fixed_header:
 *   Frees a header buffer allocated with 'alloc_fixed_header'.
 *
 *   MULTI-THREAD SAFE: called with socket locked.
 */
void free_fixed_header(struct user_pcb *pcb, u_char *hdr)
{
    ASSERT(SOCK_LOCKED(pcb->sk));

    ASSERT(hdr >= (u_char *)(pcb->shared_data_area + TX_HEADER));
    ASSERT(hdr <  (u_char *)(pcb->shared_data_area + SHARED_DATA_SIZE));

    ADD_TO_MEM_LIST(pcb->tx_free_hdr_list, (mem_block_t *)hdr);
}


/******************************************************************************
 * alloc_fixed_data:
 *   Allocate a pinned down data buffer.
 *
 *   MULTI-THREAD SAFE: called from TCP/IP with socket locked.
 */
u_char *alloc_fixed_data(struct user_pcb *pcb, int size)
{
    mem_block_t *blk;

    ASSERT(SOCK_LOCKED(pcb->sk));        
    ASSERT(size <= BYTES_PER_BUFFER);

    return(REMOVE_FROM_MEM_LIST(pcb->tx_free_data_list));
}


/******************************************************************************
 * free_fixed_data:
 *   Frees a data buffer allocated with 'alloc_fixed_data'.
 *
 *   MULTI-THREAD SAFE: called with socket locked.
 */
void free_fixed_data(struct user_pcb *pcb, u_char *data, int size)
{

    ASSERT(SOCK_LOCKED(pcb->sk)); 

    ASSERT(data >= (u_char *)(pcb->shared_data_area + TX_DATA));
    ASSERT(data <  (u_char *)(pcb->shared_data_area + RX_HEADER));

    ADD_TO_MEM_LIST(pcb->tx_free_data_list, (mem_block_t *)data);
}


/******************************************************************************
 * upload_rx_buf:
 *   Reloads an rx sk_buff to the NIC after it has passed through the stack.
 *
 *   MULTI-THREAD SAFE: called with socket locked, or in BH.
 */
void upload_rx_buf(struct user_pcb *pcb, struct sk_buff *skb)
{
#ifndef HDR_SPLIT
    usd_add_to_rx_queue(pcb->usd_conn, skb->h_head, BYTES_PER_BUFFER, TRUE);
#else
    usd_add_hdr_to_rx_queue(pcb->usd_conn, skb->h_head, MAX_HEADER_LEN);
    usd_add_data_to_rx_queue(pcb->usd_conn, skb->d_head,
                             BYTES_PER_BUFFER, TRUE);
#endif
    usd_push_new_rx_bufs_to_nic(pcb->usd_conn);
}


/******************************************************************************
 * free_tx_skbs:
 *   Free any sk_buffs which have been put onto the network.
 */
int free_tx_skbs(struct user_pcb *pcb)
{
    int i, freed = 0;
    struct sk_buff *skb;

    i = usd_remove_from_tx_queue(pcb->usd_conn);

    while ( i > 0 )
    {
        skb = skb_dequeue(&pcb->tx_queued_skbuffs);
#ifdef HDR_SPLIT
        i -= (skb->d_data != skb->d_tail) ? 2 : 1;
#else
        i--;
#endif
        kfree_skb(skb);
        freed++;
    }

    FDB("freed %d skbs", freed);
    return(freed);
}


/******************************************************************************
 * request_tx_space_upcall: now only used by UDP.
 */
void request_tx_space_upcall(struct user_pcb *pcb)
{
    usd_tx_req_callback(pcb->usd_conn, (pcb->tx_queued_skbuffs.qlen+1)>>1);
}


/******************************************************************************
 * tx_skb:
 *   Add a filled skb to the hardware transmit queue for this connection.
 *
 *   MULTI-THREAD SAFE: called from TCP/IP with socket locked.
 */
int tx_skb(struct user_pcb *pcb, struct sk_buff *skb)
{
    int h_len = skb->h_tail - skb->h_data;
    int p_len = skb->d_tail - skb->d_data;

    ASSERT(SOCK_LOCKED(pcb->sk));

    FDB("entered (h_len=%d, p_len=%d, h=%p, p=%p)",
        h_len, p_len, skb->h_data, skb->d_data);

    /*
     * We assert that there must be two free descriptors in the tx ring.
     * This must be the case because there are less than half as many hbufs
     * as there are tx descriptors, so we will always be gated by the former.
     */
    ASSERT(usd_space_in_tx_queue(pcb->usd_conn) >= 2);

    if ( skb->nh.iph->protocol == IPPROTO_TCP )
        FDB_WRN("> %c %u:%u(%u)%s %u win %u (%c%c%c%c%c) -- %u ... %u",
                p_len ? 'P' : '.', 
                ntohl(skb->h.th->seq), 
                ntohl(skb->h.th->seq) + p_len, 
                p_len,
                skb->h.th->ack?" ack":"", ntohl(skb->h.th->ack_seq), 
                ntohs(skb->h.th->window),
                skb->h.th->urg?'U':' ',
                skb->h.th->psh?'P':' ',
                skb->h.th->rst?'R':' ',
                skb->h.th->syn?'S':' ',
                skb->h.th->fin?'F':' ',
                p_len ? skb->d_data[0]  : 0, 
                p_len ? skb->d_tail[-1] : 0);

    /*
     * Set up the card's transmit descriptors -- header then payload.
     */
#ifdef HDR_SPLIT
    usd_add_to_tx_queue(pcb->usd_conn, skb->h_data, h_len);
    if ( p_len ) usd_add_to_tx_queue(pcb->usd_conn, skb->d_data, p_len);
#else
    usd_add_to_tx_queue(pcb->usd_conn, skb->h_data, h_len + p_len);
#endif
    usd_push_new_tx_bufs_to_nic(pcb->usd_conn);

    /*
     * Add to the tx queue.
     */
    skb_queue_tail(&pcb->tx_queued_skbuffs, skb);

    return(0);
}


/******************************************************************************
 * rx_skb:
 *   Called whenever a new packet arrives for a connection. This function
 *   packages it up into an skb, then passes it up into the TCP/IP stack.
 *
 *   MULTI-THREAD SAFE: called from bottom-half thread, of which there
 *                      is only one.
 */
extern kmem_cache_t *skbuff_head_cache;
int rx_skb(struct user_pcb *pcb)
{
    int length, h_len, p_len;
    u_char *start, *h_start, *p_start;
    struct sk_buff *skb;
    struct u_sock *sk_to_unlock = NULL;

    FDB("entered");

    /* Attempt to get a filled buffer from the receive queue. */
    if ( usd_remove_from_rx_queue(pcb->usd_conn,
                                  (void**)&h_start, &h_len,
                                  (void**)&p_start, &p_len) ) return(-1);

    if ( (skb = __kmem_cache_alloc(skbuff_head_cache)) == NULL ) return(-1);

    /* We may need to split off the header ourselves. */
    if ( !h_start )
    {
        struct iphdr *iph = (struct iphdr *)(p_start + 14);

        h_start = p_start;
        h_len   = 14 + (iph->ihl<<2);
        if ( iph->protocol == IPPROTO_TCP )
        {
            h_len += ((struct tcphdr *)(p_start + h_len))->doff*4;
        }
        else
        {
            h_len += sizeof(struct udphdr);
        }
        p_len   -= h_len;
        p_start += h_len;
    }

    /*
     * Data pointers.
     */
    skb->h_head = skb->h_data = h_start;
    skb->h_tail = skb->h_end  = (u_char *)h_start + h_len;
    skb->d_head = skb->d_data = p_start;
    skb->d_tail = skb->d_end  = (u_char *)p_start + p_len;

    /*
     * Lengths.
     */
    skb->len      = h_len + p_len;
    skb->truesize = skb->len + sizeof(struct sk_buff);

    /*
     * Header pointers.
     */
    skb->mac.raw = skb->h_data;
    skb->nh.raw  = skb->h_data + 14;
    skb->h.raw   = skb->h_data + 14 + skb->nh.iph->ihl*4;

    /*
     * Patch in connection's stack state.
     */
    skb->sk = skb->data_sk = pcb->sk; 
    ASSERT(skb->sk != NULL);

    /*
     * If this is a listening socket, check the data is not for one of 
     * its active connections. No need to lock the list here, as we don't
     * modify it.
     */
    if ( test_bit(STATE_LISTENING, &(pcb->state)) )
    {
        struct u_sock *sk;
        for ( sk = pcb->sk->lnext; sk != NULL; sk = sk->lnext )
        {
            if ( (sk->daddr == skb->nh.iph->saddr) &&
                 (sk->dport == skb->h.th->source) )
            {
                /*
                 * Lock the listener, as we will be using its data area as the
                 * skb passes through the stack.
                 */
                bh_lock_sock(sk_to_unlock = skb->sk);
                skb->sk = sk;
                break;
            }
        }
    }

    /* Others... */
    atomic_set(&skb->users, 1);
    atomic_set(skb_datarefp(skb), 1);
    skb->rx_buf = 1;

#ifndef NDEBUG
    if ( skb->nh.iph->protocol == IPPROTO_TCP )
        FDB_WRN("< %c %u:%u(%u)%s %u win %u (%c%c%c%c%c) -- %u ... %u",
                p_len ? 'P' : '.', 
                ntohl(skb->h.th->seq), 
                ntohl(skb->h.th->seq) + p_len, 
                p_len,
                skb->h.th->ack?" ack":"", ntohl(skb->h.th->ack_seq), 
                ntohs(skb->h.th->window),
                skb->h.th->urg?'U':' ',
                skb->h.th->psh?'P':' ',
                skb->h.th->rst?'R':' ',
                skb->h.th->syn?'S':' ',
                skb->h.th->fin?'F':' ',
                p_len ? skb->d_data[0]  : 0, 
                p_len ? skb->d_tail[-1] : 0);
#endif

#ifdef __TEST_NO_CHECKSUMS
    skb->ip_summed = 2; // CHECKSUM_UNECESSARY
#warning "XXXX Checksumming disabled on receive XXXX"
#endif

    /* Pass up to TCP/IP. */
    ip_rcv(skb, NULL, NULL);

    if ( sk_to_unlock ) bh_unlock_sock(sk_to_unlock);

    return(0); 
}


/******************************************************************************
 * detach_sock_from_nic:
 */
void detach_sock_from_nic(struct user_pcb *pcb)
{
    if ( pcb == NULL ) return;

    /* Detach from NIC. */
    if ( pcb->sk ) bh_lock_sock(pcb->sk);
    if ( pcb->usd_conn ) 
    {
        usd_close_device_connection(pcb->usd_conn);
        pcb->usd_conn = NULL;
    }
    if ( pcb->sk ) 
    {
        bh_unlock_sock(pcb->sk);
        pcb->sk       = NULL; // no more upcalls to stack
    }

    /* Clean up PCB resources if not still attached to application layer. */
    if ( !element_for_key(pcbs, pcb->uid) ) free(pcb);
}


struct user_pcb *pcb_from_sock(struct socket *sock)
{
    struct user_pcb *pcb = PCB_FROM_SOCKET(sock);
    pcb->sk = sock->sk; 
    return(pcb);
}
