#! /usr/bin/env python
###############################################################################
#                                                                             #
#   Copyright 2005 University of Cambridge Computer Laboratory.               #
#                                                                             #
#   This file is part of Nprobe.                                              #
#                                                                             #
#   Nprobe is free software; you can redistribute it and/or modify            #
#   it under the terms of the GNU General Public License as published by      #
#   the Free Software Foundation; either version 2 of the License, or         #
#   (at your option) any later version.                                       #
#                                                                             #
#   Nprobe is distributed in the hope that it will be useful,                 #
#   but WITHOUT ANY WARRANTY; without even the implied warranty of            #
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the             #
#   GNU General Public License for more details.                              #
#                                                                             #
#   You should have received a copy of the GNU General Public License         #
#   along with Nprobe; if not, write to the Free Software                     #
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA #
#                                                                             #
###############################################################################


#
from __future__ import generators


import sys
import os
import getopt
import commands

from np_file_util import get_files
from nprobe import set_print_full_hostname
from nprobe import tcp_conn, tcp_open, tcp_hdrs, _inet_aton
from  np_longutil import ull2l, tv2l


from nprobe import REC_TCP_ALL, OPEN_BIT, HDRS_BIT, TCP_CLIENT_SEEN, TCP_SERVER_SEEN
from nprobe import REC_TCP_HTTP
from np_ectrs import E_Counters
from np_TCPConn import TCPConn, TCPHdr
from np_filerec import FileRec

from print_col import whoops, overprint, F_RED


############################################################################
#############################################################################


REC_TCP_ALL_HDRS = REC_TCP_ALL | HDRS_BIT


############################################################################
#############################################################################

#
# 
#

class TCPGet:

    """Class TCPGet - A class for extracting TCP connection records from
    nprobe rep files  - see np_TCPGet main() for example of use)
    
      Initialisation:
       
         Instantiated with the following optional keyword arguments
           (default value shown):-
           'get_hdrs=0' - read and return per packet data (pkt headers)
           'open_order=0' - return connections ordered by open
             (default is ordered by close)
             N.B. this option may result in less efficient operation
           'min_mem=0' - minimise memory usage when returning connections
             in open-order
             (even less efficient, but may be necessary for large logs)
             
         Initialisation arguments can also be (re)set using the class's
          set_ops() method which takes the above args

       Public methods:
       
         The following three methods are intended to be public:

         set_args() Passes c.l. flags to instantiations of the class by the
         calling script - a list of callers c.l. flags is passed in and those
         not claimed by the class are returned for parsing by the caller
         
         -H This usage'
         -v Print (known) full hostnames
         -t Print tracing information
         -q Quiet operation
         -r<n> Read only n records and exit
         -s<n> Start reading at connection #n
         -c<connection list> Return only listed connections
           (conection list = comma-separated list of connection ids)
         -C<client list> Return only connections to listed client(s)
           (client list comma-separated list of dotted decimal addresses)
         -S<server list> Ditto server(s)
         -P<port list> Ditto to/from listed ports'
         -l Log extraction notes
         -L<dir> Ditto but in directory dir (default is in rep file directory)

         set_ops() Reset instantiation args (same optional keyword args.)

         get_conns() Return a series of TCPConn objects (see the TCPConn class's
           __init__() and adjust_tm_offsets() methods to see what connection
           data this class makes available. The class is also an entry point to
           much TCP analysis

           N.B. This method is a Python GENERATOR so will be called in the form
             'g = TCPGet(args)
              for conn in g.get_conns():
                get connection data
                ... '

       Examples:

         Examples of use of the class can be found in main() of np_TCPGet.py 
        """

    def __init__(self, get_hdrs=0, open_order=0, min_mem=0, sellist=None):

        self.optstr = 'Hvtr:s:c:qS:C:P:lL:'

        self.trace = 0
        self.quiet = 0
        self.nrecs = 0
        self.startconn = 0
        self.oneconn = None

        self.clis = self.servs = self.ports = None

        self.log = 0
        self.logdir = None
        self.logfn = self.nullf

        self.sellist = sellist

        self.set_ops(get_hdrs, open_order, min_mem)

############################################################################

    def set_ops(self, get_hdrs=0, open_order=0, min_mem=0, sellist=None):

        self.get_opens = 0

        self.get_hdrs = get_hdrs
        if get_hdrs:
            self.get_opens = 1

        self.open_order = open_order
        if open_order:
            self.get_opens = 1

        self.min_mem = min_mem


############################################################################

    def nullf(self, s):

        pass


############################################################################

    def writelog(self, s):

        self.logf.write(s + '\n')


############################################################################

    def get_files(self):

        self.openfilelist, self.counters, self.basepath = get_files(self.logfiles)
        self.run_start = tv2l(self.counters.run_start())

        #print self.basepath
        #raw_input('...')
############################################################################

    def init_log(self):

        if not self.log:
            return

        logdir, repf = os.path.split(self.basepath)
        
        if not self.logdir:
            self.logdir = logdir
        if self.logdir[-1] != '/':
            self.logdir += '/'

        if repf:
            repf = '.' + repf
        if self.optstr:
            self.optstr += '.'
            
        logfnm = self.logdir + 'tcp_get' + repf + self.optstr + 'log'

        try:
            self.logf = open(logfnm, 'w')
        except IOError, s:
            print 'ERROR', s
            sys.exit(1)

        self.logfn = self.writelog
        
        cmd = 'whoami'
	status, me = commands.getstatusoutput(cmd)
        cmd = 'hostname'
	status, host = commands.getstatusoutput(cmd)
        cmd = 'date'
	status, date = commands.getstatusoutput(cmd)

        self.logfn('Run by %s on %s %s\n' % (me, host, date))
        caller_opts = ''
        for t in self.caller_optlist:
            for c in t:
                caller_opts += c
        self.logfn('Invoked %s %s %s\n' % (self.caller, self.optstr, caller_opts))

        
        #print self.logdir, logfnm
        #sys.exit(0)

############################################################################


    def get_conns(self):

        def want(rec):
            #print nread
            #print 'pass #', connid, '\n'
            ectrs.nconns += 1

            if connid < startconn:
                ectrs.start_rej += 1
                return 0

            ret = 1
            if oneconn != None:
                if oneconn.count(connid):
                    print 'got conn %d' % (connid)
                    ectrs.conn_ids_got += 1
                else:
                    ectrs.id_rej += 1
                    #print 'id_rej'
                    return 0
            if clis and not clis.count(rec.shost()):
                ectrs.host_rej += 1
                return  0
            if servs and not servs.count(rec.dhost()):
                ectrs.host_rej += 1
                return 0
            if ports and not (ports.count(rec.dport()) or ports.count(rec.sport())):
                ectrs.port_rej += 1
                return 0

            return ret

        def get_hdrs(rec, hlist):
            #print 'getting %d hdrs #%d' % (rec.get_nhdrs_held(), connid) 
            nhdr = len(hlist)
            base_tm = ull2l(rec.get_hdrs_abstm())
            for i in range(rec.get_nhdrs_held()):
                hlist.append(TCPHdr(rec, i, base_tm, nhdr))
                nhdr += 1

        def mconn():
            conn = TCPConn(connrec, hlist, trace=self.trace, logfun=self.logfn)
            conn.adjust_tm_offsets(run_start)
            conn.FileRec = fr

            return conn
            

        #
        # Body of method starts here
        #

        
        type = REC_TCP_ALL

        connrec = tcp_conn()
        connrec.tcp_alloc_hdrbuffs()
        getconn = connrec.get_conn_and_advance_p

        if self.get_hdrs:
            hdrec = tcp_hdrs(1)
            gethdrs = hdrec.get_hdrs_p
            type |= (HDRS_BIT | OPEN_BIT)
            hdrs = 1
        else:
            hdrs = 0

        if self.get_opens:
            oprec = tcp_open()
            getopen = oprec.get_open_rec
            type |= OPEN_BIT
            ops = 1
        else:
            ops = 0

        self.get_files()
        self.init_log()

        run_start = self.run_start
    
        ectrs = self.ectrs = E_Counters()
        nrecs = self.nrecs
        oneconn = self.oneconn
        startconn = self.startconn
        clis = self.clis
        servs = self.servs
        ports = self.ports


        opord = self.open_order
        mm = self.min_mem

        mmh = mm and opord

        nread = 0
        done = 0
        opens = []
        
        conn_ids = {}

        for file in self.openfilelist:

            getrec = file.next_type_rec
            flen = os.stat(file.fnm)[6]

            if len(self.openfilelist) > 1 or 1:
                print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
                print "File %s - %d records" % (file.fnm, file.counters.nrecords)
                print 
                sys.stdout.flush()

            #print done
            while done == 0:
                #print 'loop'
                #print

                if nrecs and nread > nrecs:
                    done = 1
                    break

                rectype = getrec(type)
                #print rectype
                offset = file.curr_offset # get before read advances
                if rectype == -1: #EOF
                    print 'EOF'
                    break

                elif rectype & OPEN_BIT: # notification of TCP open
                    getopen(file)
                    connid = oprec.conn_id
                    if want(oprec):
                        opens.append(connid)
                        conn_ids[connid] = [[], [], None] #set up for list of headers

                    continue

                elif rectype & HDRS_BIT: # a block of hdrs
                    gethdrs(file)
                    connid = hdrec.conn_id
                    try:
                        ent = conn_ids[connid]
                        if not mmh:
                            hlist = ent[0]
                            get_hdrs(hdrec, hlist)
                        ent[1].append((file.indx, REC_TCP_ALL_HDRS, file, offset))
                    except KeyError:
                        print 'XXX'
                        pass

                    continue

                else: # TCP close record
                    nread += 1
                    # get tconn record and its data
                    getconn(file)
                    # get the conn_id
                    connid = connrec.get_conn_id()
                    if ops:
                        try:
                            ent = conn_ids[connid]
                            frecs = ent[1]
                            frecs.append((file.indx, REC_TCP_ALL, file, offset))
                            
                            hlist = ent[0]
                        except KeyError:
                            continue
                    elif want(connrec):
                        frecs = [(file.indx, REC_TCP_ALL, file, offset)]
                        hlist = []
                    else:
                        continue

                if hdrs and not mmh:
                    get_hdrs(connrec, hlist)

                # create Filerec for connection
                fr = FileRec('TCPConn', frecs)
                
                if opord:
                    if connid == opens[0]:
                        del opens[0]
                        yield mconn()
                        while opens:
                            dent = conn_ids[opens[0]]
                            conn = dent[2]
                            if not conn:
                                break
                            #print opens
                            if mm:
                                conn = conn.reconstruct()
                                conn.adjust_tm_offsets(run_start)
                            yield conn
                            del dent
                            del opens[0]
                    else:
                        if mm:
                            conn_ids[connid][2] = fr
                        else:
                            conn_ids[connid][2] = mconn()
                else:
                    yield mconn()
                    if ops:
                        del opens[0]
                        del conn_ids[connid]


                if not nread%100 and not done:
                    overprint('Record %d' % (nread))

            if done:
                break

        overprint('%d relevant close records' % (nread))

        ectrs.printself(None)
        print
        if opord:
            print '%d pending connections encountered but not delivered awaiting ordering gap' % (len(opens))


############################################################################

    def set_args(self, argv, caller_optstr):

        #
        # Aggregate cl list of connections wanted
        #
        def get_vals(s, attr):
            vals = []
            for f in s.split(','):
                vals.append(int(f))
            setattr(self, attr, vals)
        #
        # Aggregate cl list of hosts wanted
        #
        def get_hosts_wanted(s, attr):
            hosts = []
            for f in s.split(','):
                #print f
                #print '%d' % (string.atoi(f))
                hosts.append(_inet_aton(f))
            setattr(self, attr, hosts)

        #
        # Body of method starts here
        #

        for c in self.optstr:
            if c in caller_optstr and c != ':':
                print 'TCPGet ERROR: \'%s\' is flag for both TCPGet and calling script' % (c)
                sys.exit(1)

        self.caller = argv[0]
        caller_optlist = self.caller_optlist = []

        self.caller = argv[0]

        try:
            self.optlist, self.logfiles = getopt.getopt(argv[1:], self.optstr + caller_optstr)
        except getopt.error, s:
            print '%s: %s' % (self.caller, s)
            self.usage()
            sys.exit(1)

        optstr = ''
        print 'TCPGet opts:'
        for opt in self.optlist:

            if opt[0][1] in caller_optstr:
                caller_optlist.append(opt)
                continue
            if opt[0] == "-H":
                print
                print self.__doc__
                sys.exit(0)
            if opt[0] == "-v":
                set_print_full_hostname(1)
            if opt[0] == "-t":
                self.trace = 1
            if opt[0] == "-q":
                self.quiet = 1
                optstr += '-q'
            if opt[0] == "-r":
                self.nrecs = int(opt[1])
                optstr = optstr + '-r' + opt[1]
            if opt[0] == "-s":
                self.startconn = int(opt[1])
                optstr = optstr + '-s' + opt[1]
            if opt[0] == "-c":
                get_vals(opt[1], 'oneconn')
                optstr = optstr + '-c' + opt[1]
                print 'analysing only connection(s) ',
                print self.oneconn
            if opt[0] == '-C':
                get_hosts_wanted(opt[1], 'clis')
                optstr = optstr + '-C' + opt[1]
                print 'returning connections only for client(s) ',
                print opt[1]
            if opt[0] == '-S':
                get_hosts_wanted(opt[1], 'servs')
                optstr = optstr + '-S' + opt[1]
                print 'returning connections only for server(s) ',
                print opt[1]
            if opt[0] == '-P':
                get_vals(opt[1], 'ports')
                optstr = optstr + '-P' + opt[1]
                print 'returning connections only for port(s) ',
                print opt[1]
            if opt[0] == '-L':
                self.log = 1
                self.logdir = opt[1]
            if opt[0] == '-l':
                self.log = 1
            

        self.optstr = optstr

        return caller_optlist


############################################################################

	    
##############################################################################
##############################################################################

def main():

    """np_TCPGet.py provides the TCPGet class.

    When run as script provides trivial example of the class's use and
      examples of use of the Histogram class

    Usage: np_TCPGet.py [flags] <rep file list>
      Flags:
        -h - This documentation
        -H - documentation on np_TCPGet class
        -T - trace (verbose) mode
        -d - collect connection durations
        -e - collect connection set up durations
        -w - investigate occurence of zero window ads.
        -p - collect client/server payload sizes
        -i - print summary data on each connection
          (if one of d, e, w, p, i flags are not set default is to
            print connection id)
        -o - examine connections in open order (default is close order)
        -m - minimise memory usage if -o set

        ... and flags accepted by the np_TCPGet class

    """

    def nullf(s):

        pass

    from histo import Histogram, HistogramError, HistogramLowRangeError, HistogramHighRangeError
    from np_TCPDisplay import TcpDisplay

    # This is string of c.l. args accepted by this script
    optstr = 'Tdewphiom'

    scriptname = os.path.basename(sys.argv[0])

    # instantiate a connection getter
    getter = TCPGet(get_hdrs=0, open_order=0, min_mem=0)

    # pass flags to getter and collect what's left for us
    optlist = getter.set_args(sys.argv, optstr)

    trace = 0
    do_dur = 0
    do_est = 0
    do_zw = 0
    do_pl = 0
    print_id = 0
    print_conn = 0
    opord = 0
    minmem = 0

    # interpret our flags
    for opt in optlist:

        if opt[0] == '-T':
            trace = 1
            getter.trace = 1
        if opt[0] == '-d':
            do_dur = 1
        if opt[0] == '-e':
            do_est = 1
        if opt[0] == '-w':
            do_zw = 1
        if opt[0] == '-p':
            do_pl = 1
        if opt[0] == '-i':
            print_conn = 1
        if opt[0] == '-o':
            opord = 1
        if opt[0] == '-m':
            minmem = 1
        if opt[0] == '-h':
            print
            print main.__doc__
            sys.exit(0)

    nboth = 0
    no_c = 0
    no_s = 0

    if not (do_dur or do_est or do_zw or do_pl or print_conn):
        print_id = 1

    #
    # c.l. flags may have changed getter parameters - reset
    #
    getter.set_ops(open_order=opord, min_mem=minmem)


    #
    # Set up histograms to collect required data
    #
    if do_dur:
        try:
            #
            # set bounds based on first 5000 samples, save OOB samples for
            # examination
            #
            dh = Histogram(ntrigvals=5000, save_orange=1)
        except HistogramError, s:
            print s, 'Duration histogram'
            return

    if do_est:
        try:
            #
            # set upper bound based on first 5000 samples and lower bound of
            # zero. Raise OOB exceptions
            #
            eh = Histogram(lower=0, ntrigvals=5000, rangeexcept=1)
        except HistogramError, s:
            print s, 'Establishment histogram'
            return
        #
        # set to get hdrs - may want to see what's going on if low range
        # exceptions raised
        #
        getter.set_ops(get_hdrs=1)

    if do_zw:
        try:
            # good idea of what's to be catered for here
            zh = Histogram(lower=0, bucketsz=1, upper=10)
        except HistogramError, s:
            print s, 'ZW histogram'
            return
        getter.set_ops(get_hdrs=1)

    if do_pl:
        try:
            phc = Histogram(lower=0, bucketsz=100, ntrigvals=1000, save_orange=1)
            phs = Histogram(lower=0, bucketsz=100, ntrigvals=1000, save_orange=1)
        except HistogramError, s:
            print s, 'PL histogram'
            return

    #
    # Here's the main loop to get the connections
    #
    for c in getter.get_conns():
        if print_id:
            print '#%d\n' % (c.id)
        if print_conn:
            c.printself_with_pkts()
            #c.printself()

        #
        # Discard conns where both client and server not seen
        #
        flags = c.flags

        if not flags & TCP_CLIENT_SEEN:
            no_c += 1
            continue
        if not flags & TCP_SERVER_SEEN:
            no_s += 1
            continue

        nboth += 1
        if do_dur:
            dh.add(c.dur/1000)
            
        if do_est:
            # have to catch OOB exceptions for this one
            try:
                eh.add((c.cliacksyn - c.clisyn)/1000)
            except HistogramLowRangeError, e:
                print e.val
                # take a look at this connection
                c.printself_with_pkts()
                # or alternatively could visualise it
                #TcpDisplay([c], standalone='yes', path='',logfun=nullf, trace=1)

        if do_zw:
            pl = c.pktlist
            zw = 0
            nzw = 0
            for p in pl:
                if p.window == 0:
                    if not zw:
                        zw = 1
                        nzw += 1
                else:
                    zw = 0
            zh.add(nzw)

        if do_pl:
            phc.add((c.cbytes, c.id))
            phs.add((c.sbytes, c.id))

    print '%d connections' % (nboth)
    print 'No server seen %d' % (no_s)
    print 'No client seen %d' % (no_c)

    #
    # Now look at the results collected
    #

    #draw the first two if done
    if do_dur:
        dh.results(draw=1, title='Connection duration (ms)')
        
    if do_est:
        eh.results(draw=1, title='Connection establishment (ms)')

    # just print out these results
    if do_zw:
        zh.results(printit=1, title='No. window closures')

    # have a different look at some of the data for these
    if do_pl:
        for h, end in [(phc, 'Client'), (phs, 'Server')]:
            print '%s results' % (end)
            d = h.results(draw=1, title='%s Payload' % (end))
            print '%s payloads (bytes):-' % (end)
            print '%d samples' % d['nsamples']
            print 'Min %d, Max %d, Upper histo bound %d - %d above as follows' % (d['min'], d['max'], d['upper'], d['nhigh'])
            for b in d['highs']:
                print '\t#%5d %10d' % (b[1], b[0])
            print
    
	    
##############################################################################
##############################################################################


# Call main when run as script
if __name__ == '__main__':
        main()
    
