#define _GNU_SOURCE
#include <sys/fcntl.h>
#include <sys/unistd.h>
#include <assert.h>
#include <err.h>
#include <pcap.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

typedef unsigned char uint8_t;
typedef unsigned short uint16_t;

#include <netinet/in.h>
#include <linux/if_ether.h>
#include <linux/ip.h>
#include <linux/tcp.h>

static unsigned
packet_drop_rate;
static FILE *
summary_log;
static unsigned
hash_lookups;
static unsigned long long
hash_comparisons;
static unsigned long long
packets_processed;
static unsigned long long
packets_bloomed;
static unsigned
connections_dumped;

struct conn_record {
	unsigned long long duration; /* Microseconds */
	unsigned long long tot_bytes;
	unsigned long long payload_bytes;
	uint32_t src;
	uint32_t dest;
	uint16_t sport;
	uint16_t dport;
};

struct segment {
	struct segment *next;
	unsigned dlen;
	struct timeval ts;
	unsigned char data[0];
};

static inline int
SEQ_GT(unsigned long y, unsigned long x)
{
	if (y > x) {
		return 1;
	}
	if (x < 0xc0000000)
		return 0;
	/* Maybe overflow? */
	if (y < 0x40000000)
		return 1;
	else
		return 0;
}

#define SEQ_MAX(x, y) (SEQ_GT(x,y) ? (x) : (y))

static char
errbuf[PCAP_ERRBUF_SIZE];

static unsigned long
filter_bits[16384];

struct tcpconn {
	struct tcpconn *companion; /* The other direction */

	struct tcpconn **prevp, *next; /* Hash table overflow */

	struct tcpconn **lru_prevp, *lru_next;

	struct segment *first_seg, *last_seg;

	uint32_t saddr; /* In network order */
	uint32_t daddr;
	uint16_t sport;
	uint16_t dport;

	uint32_t seq; /* Host order */
	uint32_t hash;
	uint32_t fin_seq;
	uint32_t syn_seq;

	unsigned long long tot_bytes;

	unsigned acked_fin:1,
		 fin_sent:1,
		 syn_sent:1,
		 is_secondary:1,
		 interesting:1,
		 abnormal:1,
		 hold:1;
};

static struct tcpconn *
hash_table[65536];
static struct tcpconn
lru_tail;
static struct tcpconn *
lru_head = &lru_tail;

#define MAX(x,y) ((x) > (y) ? (x) : (y))

#define MEM_THRESH (1500 * 1024 * 1024)

static unsigned
nr_live_conns;
static unsigned
nr_conns;
static unsigned
mem_in_use;

static void *
get_memory(unsigned size)
{
	unsigned *b;
	if (mem_in_use + size >= MEM_THRESH)
		return NULL;
	mem_in_use += size;
	b = calloc(1, size + sizeof(*b));
	b[0] = size;
	return b + 1;
}

static void
release_mem(void *x)
{
	unsigned *y = x;
	mem_in_use -= y[-1];
	free(y - 1);
}

static unsigned
bloom(uint32_t saddr, uint32_t daddr, uint16_t sport, uint16_t dport)
{
	uint16_t buf[6];
	unsigned hash;
	unsigned x;

	memcpy(buf, &saddr, 4);
	daddr = ntohl(daddr);
	memcpy(buf + 2, &daddr, 4);
	sport = ntohs(sport);
	memcpy(buf + 4, &sport, 2);
	memcpy(buf + 5, &dport, 2);
	x = 0;
	hash = 0;
	while (x < 6) {
		hash ^= buf[x];
		x++;
	}
	return hash;
}

static void
sanity_check(const struct tcpconn *conn)
{
	assert(*conn->prevp == conn);
	assert(*conn->lru_prevp == conn);
	if (conn->next)
		assert(conn->next->prevp == &conn->next);
	assert(conn->lru_next->lru_prevp == &conn->lru_next);
}

static void
dump_segment(int fd, const struct segment *s)
{
	struct pcap_pkthdr ph;
	ph.ts = s->ts;
	ph.caplen = s->dlen;
	ph.len = s->dlen;
	write(fd, &ph, sizeof(ph));
	write(fd, s->data, s->dlen);
}

static int
open_dumpfile(void)
{
	char buf[4096];
	static int next_index;
	int dump_fd;
	struct pcap_file_header pfh = {0};
	sprintf(buf, "dump_file%d", next_index++);
	dump_fd = open(buf, O_WRONLY | O_TRUNC | O_CREAT, 0600);
	pfh.magic = 0xa1b2c3d4;
	pfh.version_major = 2;
	pfh.version_minor = 4;
	pfh.snaplen = 65535;
	pfh.linktype = DLT_RAW;
	write(dump_fd, &pfh, sizeof(pfh));
	return dump_fd;
}

static void
_close_connection(struct tcpconn *conn)
{
	struct segment *s, *n;

	sanity_check(conn);

	if (conn->next)
		conn->next->prevp = conn->prevp;
	conn->lru_next->lru_prevp = conn->lru_prevp;
	*conn->prevp = conn->next;
	*conn->lru_prevp = conn->lru_next;

	if (!conn->abnormal &&
	    conn->acked_fin) {
		struct conn_record cr;
		assert(conn->last_seg);
		assert(conn->first_seg);
		cr.duration = conn->last_seg->ts.tv_usec -
			conn->first_seg->ts.tv_usec;
		cr.duration += (conn->last_seg->ts.tv_sec -
				conn->first_seg->ts.tv_sec) * 1e6;
		cr.tot_bytes = conn->tot_bytes;
		cr.payload_bytes = conn->fin_seq - conn->syn_seq;
		cr.src = conn->saddr;
		cr.dest = conn->daddr;
		cr.sport = conn->sport;
		cr.dport = conn->dport;
		fwrite(&cr, 1, sizeof(cr), summary_log);
	}

	for (s = conn->first_seg; s; s = n) {
		n = s->next;
		release_mem(s);
	}
	release_mem(conn);
	nr_live_conns--;
}

static void
close_connection(struct tcpconn *conn)
{
	assert(conn->companion->companion == conn);
	if (conn->abnormal || conn->companion->abnormal)
		conn->interesting = conn->companion->interesting = 0;
	if (conn->interesting || conn->companion->interesting) {
		int fd;
		struct segment *s1, *s2;
		fd = open_dumpfile();
		s1 = conn->first_seg;
		s2 = conn->companion->first_seg;
		while (s1 && s2) {
			if (s1->ts.tv_sec < s2->ts.tv_sec ||
			    (s1->ts.tv_sec == s2->ts.tv_sec &&
			     s1->ts.tv_usec < s2->ts.tv_usec)) {
				dump_segment(fd, s1);
				s1 = s1->next;
			} else {
				dump_segment(fd, s2);
				s2 = s2->next;
			}
		}
		while (s1) {
			dump_segment(fd, s1);
			s1 = s1->next;
		}
		while (s2) {
			dump_segment(fd, s2);
			s2 = s2->next;
		}
		close(fd);
		connections_dumped++;
	}
	_close_connection(conn->companion);
	_close_connection(conn);
}

static void
make_recent(struct tcpconn *t)
{
	if (t->next)
		t->next->prevp = t->prevp;
	t->lru_next->lru_prevp = t->lru_prevp;
	*t->prevp = t->next;
	*t->lru_prevp = t->lru_next;

	t->next = hash_table[t->hash];
	t->prevp = &hash_table[t->hash];
	if (hash_table[t->hash])
		hash_table[t->hash]->prevp = &t->next;
	hash_table[t->hash] = t;

	t->lru_next = lru_head;
	t->lru_prevp = &lru_head;
	lru_head->lru_prevp = &t->lru_next;
	lru_head = t;
}

static struct tcpconn *
create_connection(void)
{
	struct tcpconn *x;
	x = get_memory(sizeof(*x));
	if (!x)
		return NULL;
	x->companion = get_memory(sizeof(*x->companion));
	if (!x->companion) {
		release_mem(x);
		return NULL;
	}
	x->companion->companion = x;
	x->companion->is_secondary = 1;
	nr_live_conns += 2;
	nr_conns += 2;
	return x;
}

static void
drop_connection(struct tcpconn *p)
{
	filter_bits[p->hash / 32] |= (1 << (p->hash % 32));
	close_connection(p);
}

/* Free up some memory by releasing non-recently used connections.
   Also set some bloom filter bits so that it doesn't happen again */
static int
release_memory(void)
{
	struct tcpconn *p;
	unsigned count = 0;

	while (count < 128) {
		p = &lru_tail;
		if (p == lru_head)
			break;
		p = (void *)p->lru_prevp - offsetof(struct tcpconn, lru_next);
		if (p->hold)
			break;
		drop_connection(p);
		count++;
	}
	if (count == 0)
		return 0;
	else
		return 1;
}

static void
add_segment(struct tcpconn *conn,
	    const void *data,
	    unsigned dlen,
	    const struct timeval *ts)
{
	struct segment *s;

	if (conn->abnormal)
		return;
 retry:
	conn->hold = 1;
	s = get_memory(sizeof(*s) + dlen);
	if (!s) {
		if (!release_memory()) {
			conn->hold = 0;
			return;
		}
		goto retry;
	}
	conn->hold = 0;
	s->next = NULL;
	if (conn->last_seg)
		conn->last_seg->next = s;
	else {
		conn->last_seg = conn->first_seg = s;
	}
	conn->last_seg = s;
	s->dlen = dlen;
	s->ts = *ts;
	memcpy(s->data, data, dlen);
}

static struct tcpconn *
find_conn(uint32_t hash, uint32_t saddr, uint32_t daddr,
	  uint16_t sport, uint16_t dport)
{
	struct tcpconn *t;

	hash_lookups++;
	t = hash_table[hash];
	while (t) {
		hash_comparisons++;
		if (saddr == t->saddr &&
		    daddr == t->daddr &&
		    sport == t->sport &&
		    dport == t->dport)
			break;
		t = t->next;
	}
	if (t) {
		make_recent(t);
		make_recent(t->companion);
		return t;
	}
	t = create_connection();
	while (!t) {
		if (!release_memory())
			break;
		t = create_connection();
	}
	if (!t)
		return NULL;
	t->saddr = saddr;
	t->daddr = daddr;
	t->sport = sport;
	t->dport = dport;
	t->hash = hash;
	t->companion->saddr = daddr;
	t->companion->daddr = saddr;
	t->companion->sport = dport;
	t->companion->dport = sport;
	t->companion->hash = bloom(daddr, saddr, dport, sport);

	t->next = hash_table[t->hash];
	t->prevp = &hash_table[t->hash];
	if (hash_table[t->hash])
		hash_table[t->hash]->prevp = &t->next;
	hash_table[t->hash] = t;

	t->lru_next = lru_head;
	t->lru_prevp = &lru_head;
	lru_head->lru_prevp = &t->lru_next;
	lru_head = t;

	sanity_check(t);

	t->companion->next = hash_table[t->companion->hash];
	t->companion->prevp = &hash_table[t->companion->hash];
	if (hash_table[t->companion->hash])
		hash_table[t->companion->hash]->prevp = &t->companion->next;
	hash_table[t->companion->hash] = t->companion;

	t->companion->lru_next = lru_head;
	t->companion->lru_prevp = &lru_head;
	lru_head->lru_prevp = &t->companion->lru_next;
	lru_head = t->companion;

	sanity_check(t->companion);
	sanity_check(t);

	return t;
}

static struct timeval
trace_started;

static int
trace_body(const struct timeval *tv)
{
	struct timeval t = *tv;
	t.tv_sec -= 30;
	return (t.tv_sec > trace_started.tv_sec) ||
		(t.tv_sec == trace_started.tv_sec &&
		 t.tv_usec > trace_started.tv_usec);
}

static void
advance_time(const struct timeval *tv)
{
	static struct timeval last_tv;
	static unsigned called;
	if (!called) {
		trace_started = *tv;
		called = 1;
	}
	/* Once a second, open the filter up again */
	if ((tv->tv_sec > last_tv.tv_sec &&
	     tv->tv_usec > last_tv.tv_usec) ||
	    (tv->tv_sec > last_tv.tv_sec + 1)) {
		memset(filter_bits, 0, sizeof(filter_bits));
		last_tv = *tv;
	}
}

static void
process_packet(unsigned char *ignore,
	       const struct pcap_pkthdr *hdr,
	       const unsigned char *data)
{
	struct ethhdr *eh;
	struct iphdr *iph;
	struct tcphdr *tph;
	struct tcpconn *conn;
	unsigned bloom_hash;
	void *data_start;
	unsigned tcp_bytes;

	if (random() < (RAND_MAX / packet_drop_rate))
		return;

#if 1
	eh = (struct ethhdr *)data;
	iph = (struct iphdr *)((void *)(eh + 1));
	if (ntohs(eh->h_proto) != ETH_P_IP) {
		printf("Not IP?\n");
		return; /* Ignore */
	}
#else
	iph = (struct iphdr *)data;
#endif

	advance_time(&hdr->ts);

	packets_processed++;

	if (iph->protocol != IPPROTO_TCP) {
		return; /* Ignore */
	}
	if (iph->frag_off & htons(0x1fff)) {
		printf("Ignore frag: %x.\n", iph->frag_off);
		return;
	}
	tph = (void *)iph + iph->ihl * 4;
	bloom_hash = bloom(iph->saddr, iph->daddr, tph->source, tph->dest);
	if (filter_bits[bloom_hash / 32] & (1 << (bloom_hash % 32))) {
		packets_bloomed++;
		return;
	}
	conn = find_conn(bloom_hash,
			 iph->saddr, iph->daddr, tph->source, tph->dest);
	if (!conn) {
		filter_bits[bloom_hash / 32] |= 1 << (bloom_hash % 32);
		return;
	}
	if (tph->doff != sizeof(struct tcphdr) / 4) {
		/* Options. */
		unsigned char *opt_ptr;
		unsigned opt_avail;
		unsigned opt_consumed;
		opt_avail = tph->doff * 4 - sizeof(struct tcphdr);
		opt_ptr = (unsigned char *)(tph + 1);
		opt_consumed = 0;
		while (opt_consumed < opt_avail) {
			switch (opt_ptr[opt_consumed]) {
			case 0:
				opt_consumed = opt_avail + 1;
				break;
			case 1:
				opt_consumed++;
				break;
			case 5:
				/* SACK -> can't handle it */
				conn->abnormal = 1;
				conn->companion->abnormal = 1;
			default:
				if (opt_ptr[opt_consumed + 1] == 0) {
					opt_consumed = opt_avail + 1;
				} else {
					opt_consumed += opt_ptr[opt_consumed+1];
				}
				break;
			}
		}
	}
	add_segment(conn, data, hdr->len, &hdr->ts);
	tcp_bytes = ntohs(iph->tot_len) - sizeof(*iph) - sizeof(*tph);
	conn->seq = SEQ_MAX(ntohl(tph->seq) + tcp_bytes, conn->seq);
	if (tph->ack) {
		if (conn->companion->syn_sent &&
		    SEQ_GT((unsigned long)ntohl(tph->ack_seq), conn->companion->seq + 2) &&
		    trace_body(&hdr->ts)) {
			conn->interesting = conn->companion->interesting = 1;
			conn->companion->seq = ntohl(tph->ack_seq);
		}
		if (conn->companion->fin_sent &&
		    SEQ_GT((unsigned long)ntohl(tph->ack_seq), conn->companion->fin_seq)) {
			conn->acked_fin = 1;
			if (conn->companion->acked_fin) {
				close_connection(conn);
				return;
			}
		}
	}
	if (tph->rst) {
		conn->interesting = 0;
		conn->companion->interesting = 0;
		close_connection(conn);
		return;
	}
	if (tph->fin) {
/*
		if (conn->acked_fin) {
			close_connection(conn);
			return;
		}
*/
		conn->fin_seq = ntohl(tph->seq);
		conn->fin_sent = 1;
		conn->seq++;
	}
	if (tph->syn) {
		conn->syn_sent = 1;
		conn->syn_seq = conn->seq;
		conn->seq++;
	}
	if (!conn->syn_sent) {
		conn->syn_sent = 1;
		conn->abnormal = conn->companion->abnormal = 1;
	}
}

static void
process_file(const char *name)
{
	pcap_t *p;
	printf("Process %s.\n", name);
	p = pcap_open_offline(name, errbuf);
	if (!p)
		errx(1, "openning %s: %s", name, errbuf);
	pcap_loop(p, -1, process_packet, NULL);
	pcap_close(p);
	printf("Hash goodness %f.\n", hash_lookups/(double)hash_comparisons);
	printf("Processed %lld packets, bloom rate %f.\n",
	       packets_processed, packets_bloomed / (double)packets_processed);
	printf("%d suspicious connections dumped.\n", connections_dumped);
	printf("Using %d(%f) bytes in %d/%d connections.\n",
	       mem_in_use, (double)mem_in_use / MEM_THRESH,
	       nr_live_conns, nr_conns);
}

static char *
readline(FILE *f)
{
	int r;
	char *res;
	r = fscanf(f, "%as", &res);
	if (r != 1) {
		assert(r < 1);
		return NULL;
	}
	return res;
}

int
main(int argc, char *argv[])
{
	char *fname;
	int x;
	struct tcpconn *c;
	unsigned done_something;

	packet_drop_rate = atoi(argv[1]);
	argv++;
	argc--;
	summary_log = fopen("summary", "w");
	if (argc > 1) {
		for (x = 1; x < argc; x++)
			process_file(argv[x]);
	} else {
		do {
			fname = readline(stdin);
			if (fname) {
				process_file(fname);
				free(fname);
			}
		} while (fname);
	}
	fprintf(stderr, "%f %f\n", 1 - (1.0/packet_drop_rate),
		(double)packets_processed / (double)connections_dumped);

	do {
		done_something = 0;
		for (x = 0; x < 65536; x++) {
			for (c = hash_table[x]; c; c = c->next) {
				close_connection(c);
				done_something = 1;
				break;
			}
		}
	} while (done_something);
	fclose(summary_log);

	return 0;
}
