#! /usr/bin/env python
###############################################################################
#                                                                             #
#   Copyright 2005 University of Cambridge Computer Laboratory.               #
#                                                                             #
#   This file is part of Nprobe.                                              #
#                                                                             #
#   Nprobe is free software; you can redistribute it and/or modify            #
#   it under the terms of the GNU General Public License as published by      #
#   the Free Software Foundation; either version 2 of the License, or         #
#   (at your option) any later version.                                       #
#                                                                             #
#   Nprobe is distributed in the hope that it will be useful,                 #
#   but WITHOUT ANY WARRANTY; without even the implied warranty of            #
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the             #
#   GNU General Public License for more details.                              #
#                                                                             #
#   You should have received a copy of the GNU General Public License         #
#   along with Nprobe; if not, write to the Free Software                     #
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA #
#                                                                             #
###############################################################################


##############################################################################
## 
##
## 
## 
##
## 

############################################################################

import string
import glob
import os
import sys
from sys import argv
import getopt
from signal import *
import atexit
import types

import gc
from resource import getrusage, RUSAGE_SELF
import np_warnings

import Numeric
from traceback import print_stack

from print_col import upline, overprint, whoops

from nprobe import intoa_string, _free, malloc_report
from nprobe import tcp_hdrs, tcp_open
from nprobe import _inet_aton, tcp_open, tcp_hdrs, accept_conn, filter_help
from nprobe import REC_TCP_HTTP, REC_TCP_HTTP_OPEN, REC_TCP_HTTP_HDRS, \
     REC_UDP_DNS, OPEN_BIT, HDRS_BIT
from np_file_util import get_files
from np_http_util import allocate_http_reusable_objects, get_http_rec_and_trans
from np_ns_utils import NSLookup, get_ns_rec
from np_longutil import tv2l
from np_namecache import NameCache
from np_tfilter import TFilter
from np_filerec import FileRec
from np_ectrs import E_Counters
from np_statscollector import get_IPaddrs, NoIPAddrError
from np_treestats import TreeStats, NOB_THRESH

import scratch

############################################################################

def usage(scriptname):
    print "usage: " + scriptname + "rep-file-list"

    sys.exit(1)

############################################################################

def exitfun():
    
    from np_grabrec import grabobj
    if grabobj:
        grabobj.close()

    if namecache:
        namecache.file_cache()

#############################################################################

def handle_sigint(n, f):
    
    print 'SIGINT'
    print_stack()
    exitfun()
    sys.exit(0)

#############################################################################

def do_ns(lookups, connrec, nsrec, cachefn):
    
    rec = nsrec.ns_rec
    reqstate = nsrec.ns_rec.state
    if not rec.state & 0x3 == 0x3: # both request and response seen
	return

    cli = connrec.shost()

    lookup = NSLookup(cli, connrec, nsrec)
    
    if len(lookup.rrlist):
	# only interested if IP_ADDR or CNAME requests
        le = lookups.setdefault(cli, [])
        le.append(lookup)

	# append rr's to our name cache
	for rr in lookup.rrlist:
	    if rr.code == 1:
		cachefn(rr.addr, rr.req)
		#print 'adding'

#############################################################################

#
# Null logging function (don't want particular examination to o'write logs
#

def write_null(arg):

    pass

    

#############################################################################

#
# Aggregate cl list of clients wanted

def get_clis_wanted(s):
    clis = []
    for f in string.splitfields(s, ','):
	#print f
	#print '%d' % (string.atoi(f))
	clis.append(_inet_aton(f))

    return clis

#############################################################################

def pre_pass_A(openfilelist, nrecs, spec, fspec, stats):

    def by_1(a, b):
        return b[1] - a[1]

    #
    # To get TCP records into
    #
    connrec, translist = allocate_http_reusable_objects()
    
    nread = 0
    done = 0
    do_del = 0
    rank = 0

    cyes = {}
    call = {}
    syes = {}
    sall = {}

    nconns = 0

    ectrs = stats.ectrs

    if spec == find_delays:
        why = 'finding potential delayed servers'
        do_del = 1
        stats.del_only = 1
    elif type(spec) == types.StringType:
        if spec.find('rank') == 0:
            rank = 1
            spec = spec.replace('rank', '')
            rend = spec[0].upper()
            try:
                nrank = int(spec[1:])
            except ValueError:
                print 'Pre_pass: don\'t understand int arg to', spec
                sys.exit(1)
            why = 'Ranking top %d %s' % (nrank, rend)
    else:
        print 'Pre_pass: don\'t understand spec', spec
        sys.exit(1)
        
        
        

    print 'Pre_pass:', why
    print
    
    for file in openfilelist:

        if done:
            break
        
        if len(openfilelist) > 1:
            print "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
            print "File %s - %d records" % (file.fnm, file.counters.nrecords)
            print
            sys.stdout.flush()

        while not done:

            if nrecs and nread > nrecs:
                done = 1
                break

            rectype = file.next_type_rec(REC_TCP_HTTP)
                
            if rectype == -1: #EOF
                break

            elif rectype == REC_TCP_HTTP: #TCP/HTTP
                nread = nread + 1
                get_http_rec_and_trans(file, connrec, translist)

                if not accept_conn(connrec.flow_inner, fspec):
                    ectrs.cf_rej += 1
                    continue

                if do_del:
                    ca = connrec.shost()
                    sa = connrec.dhost()
                    call[ca] = 1
                    sall[sa] = 1
                    if spec(connrec):
                        syes[sa] = 1
                        cyes[ca] = 1
                elif rank:
                    nconns += 1
                    if rend == 'S':
                        sa = connrec.dhost()
                        try:
                            syes[sa] += 1
                        except KeyError:
                            syes[sa] = 1
                    elif rend == 'C':
                        ca = connrec.shost()
                        try:
                            syes[ca] += 1
                        except KeyError:
                            syes[ca] = 1
                    else:
                        print 'Pre_pass rank: unknown end', rend
                        sys.exit(1)
                else:
                    print 'Pre_pass: don\'t know spec', spec
                    sys.exit(1)

            else:
                file.advance()
                #print 'Pass1 - wrong record type', rectype
                #sys.exit(1)
                
            if not nread%100:
                overprint('Record %d' % (nread))

    overprint('%d relevant records' % (nread))

    if do_del:
        lsy = len(syes)
        lsa = len(sall)
        lcy = len(cyes)
        lca = len(call)
        s = 'Pre_pass - %d/%d Servers showing possible delays (%d/%d clients)    ' % (lsy, lsa, lcy, lca)
        print s
        stats.write_log(s)

    elif rank:
        nrconns = 0
        servs = syes.items()
        syes = {}
        servs.sort(by_1)
        if rend == 'S':
            d = syes
        else:
            d = cyes
        for s in servs[:nrank]:
            d[s[0]] = 1
            nrconns += s[1]
        s = 'Pre_pass - %d Servers seen (%d/%d conns)    ' % (len(servs),
                                                               nrconns, nconns)
        print s
        stats.write_log(s)

    #syes.update(cyes)
    return (syes, cyes)

#############################################################################

def pass1(openfilelist, stats, nrecs, clis, servs, wantdicts, scwd,
          connid,
          startconn, fspec,
          tfilt, cachefn):

    #
    # A dictionary of HTTP TCP connections in the log {conn_id: filerec}
    #
    obdict = stats.obdict

    #
    # Dictionarie of servers seen - {addr: [conn ids]}
    #
    stats.cdict = clidict = {}
    stats.sdict = servdict = {}
    
    #
    # List of DNS IP_ADDR and CNAME lookups seen
    #
    stats.nslookups = lookups = {}
    
    #
    # Dictionary of tcp connections keyed by conn_id - used to collect headers
    #
    conn_ids = {}
    
    #ectrs = E_Counters()
    ectrs = stats.ectrs

    if wantdicts:
        if wantdicts[0]:
            swd = wantdicts[0]
        else:
            swd = None
        if wantdicts[1]:
            cwd = wantdicts[1]
        else:
            cwd = None
    #
    # To get TCP records into
    #
    tcp_open_rec = tcp_open()
    hdrs = tcp_hdrs(1)
    connrec, translist = allocate_http_reusable_objects()
    
    nread = 0
    done = 0

    print 'Pass 1: collating client records'
    print
    
    for file in openfilelist:

        if done:
            break
        file.reset()

        flen = os.stat(file.fnm)[6]
        
        if len(openfilelist) > 1:
            print "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
            print "File %s - %d records" % (file.fnm, file.counters.nrecords)
            print
            sys.stdout.flush()

        while not done:

            if nrecs and nread > nrecs:
                break

            rectype = file.find_next_http_and_hdrs_or_ns_record()
            
            offset = file.curr_offset # get before read advances
            #print 'offset %d' % (offset)
            if offset >= flen:
                print 'ERROR bad offset %d - file size %d' % (offset, flen)
                
            if rectype == -1: #EOF
                break

            elif rectype == REC_TCP_HTTP_OPEN: # notification of TCP open
                tcp_open_rec.get_open_rec(file)
                ectrs.nconns += 1
                id = tcp_open_rec.conn_id
                saddr = tcp_open_rec.dhost()
                caddr = tcp_open_rec.shost()

                if id < startconn:
                    continue

                elif not accept_conn(tcp_open_rec.flow, fspec):
                    ectrs.cf_rej += 1
                    continue
                
                if connid:
                    if id == connid:
                        conn_ids[id] = []
                        print 'Got conn #%d' % (connid)
                        print
                    else:
                        ectrs.id_rej += 1
                
                elif clis != None:
                    caddr = tcp_open_rec.shost()
                    if clis.count(caddr):
                        conn_ids[id] = [] #set up for list of headers
                    else:
                        ectrs.id_rej += 1
                
                elif servs != None:
                    if servs.count(saddr):
                        conn_ids[id] = [] #set up for list of headers
                    else:
                        ectrs.id_rej += 1

                elif wantdicts:
                    if swd:
                        if swd.has_key(saddr):
                            if cwd:
                                if cwd.has_key(caddr):
                                    conn_ids[id] = []
                                else:
                                    ectrs.id_rej += 1
                            else:
                                conn_ids[id] = []
                        else:
                            ectrs.id_rej += 1
                    else:
                        conn_ids[id] = []

                elif scwd:
                    try:
                        w = scwd[saddr][caddr]
                        conn_ids[id] = []
                    except KeyError:
                        ectrs.id_rej += 1
                else:
                    conn_ids[id] = []


            elif rectype == REC_TCP_HTTP_HDRS: # a block of hdrs
                hdrs.get_hdrs_p(file)
                conn_id = hdrs.conn_id
                if conn_ids.has_key(conn_id):
                    conn_ids[conn_id].append((file.indx,
                                              REC_TCP_HTTP_HDRS, file, offset))

            elif rectype == REC_TCP_HTTP: #TCP/HTTP
                nread = nread + 1
                get_http_rec_and_trans(file, connrec, translist)
                # get the conn_id
                conn_id = connrec.get_conn_id()
                #print '#', conn_id
                
##                 # only want where both directions seen
##                 if not (connrec.server_seen() and connrec.client_seen()):
##                     if conn_ids.has_key(conn_id):
##                         # remove dictionary item
##                         del conn_ids[conn_id]
##                         ectrs.ow_rej += 1
##                     continue

                if conn_ids.has_key(conn_id): # one we're interested in
                    if connid and connid == conn_id:
                        done = 1
                    if tfilt.pss(connrec, translist): # one we're interested in
                        # find any headers already dumped
                        hdrlist = conn_ids[conn_id]
                        hdrlist.append((file.indx, REC_TCP_HTTP, file, offset))

                        scratch.conns_accepted += 1

                        # add a connection filerec to obdict
                        conn = FileRec('TCPConn', hdrlist)
                        #conn.add(hdrlist)
                        obdict[conn_id] = conn

                        # add conn to clidict
                        cli = connrec.shost()
                        try:
                            c = clidict[cli]
                        except KeyError:
                            c = clidict[cli] = []
                        c.append(conn_id)

                        # add conn to servdict
                        serv = connrec.dhost()
                        try:
                            s = servdict[serv]
                        except KeyError:
                            s = servdict[serv] = []
                        s.append(conn_id)

                    else:
                        # not interested
                        ectrs.tf_rej += 1
                    # remove dictionary item
                    del conn_ids[conn_id]

            elif rectype == REC_UDP_DNS: #UDP_DNS
                #
                # XX TMP XX
                #
                file.advance()
                continue
                #indx, uconnrec, nsrec = get_ns_rec(file)
                #do_ns(lookups, uconnrec, nsrec, cachefn)

            else:
                print '%s Illicit record type encountered %d' % (scriptname,
                                                                 rectype)
            if not nread%100:
                overprint('Record %d' % (nread))
##                 sys.stdout.write('.')
##                 sys.stdout.flush()

    overprint('%d relevant records' % (nread))

    print
    return ectrs

############################################################################

def find_delays(cr):

    return cr.has_delay()

############################################################################

#
# Global so accessible to exit function
#
namecache = None
stats = None
	
def main():
	
    global namecache, stats

    scriptname = os.path.basename(argv[0])
    draw_tree = 0
    nrecs = 0
    fspec = 0
    trace = 0
    verb = 0
    quiet = 0
    rec_obs = 0
    nob_thresh = NOB_THRESH
    tfilt = TFilter()
    clis = None
    servs = None
    cachedir = None
    connid = None
    startconn = 0
    wantfn = None
    pre_rank = None
    use_saved_delayed_servers = 0
    use_saved_delayed_servers_and_clients = 0
    savedata = 1
    report_mem = 0

    try:
        optlist, args = getopt.getopt(sys.argv[1:],
                                      'hvV:dtc:s:C:S:r:F:T:D:R:qN:x:mO')

    except getopt.error, s:
        print '%s: %s' % (scriptname, s)
        usage(scriptname)
        sys.exit(1)

    optstr = ''

    for opt in optlist:
        if opt[0] == "-h":
            usage(scriptname)
        if opt[0] == "-v":
            use_hostnames()
        if opt[0] == "-V":
            verb = 1
            if len(opt[1]):
                cachedir = opt[1]
        if opt[0] == "-d":
            draw_tree = 1
            trace = 1
        if opt[0] == "-q":
            quiet = 1
            optstr += '-q'
        if opt[0] == "-t":
            trace = 1
        if opt[0] == "-x":
            savedata = 0
        if opt[0] == "-r":
            nrecs = string.atoi(opt[1])
            optstr = optstr + '-r' + opt[1]
        if opt[0] == '-C':
            clis = get_clis_wanted(opt[1])
            optstr = optstr + '-C' + opt[1]
            print 'building trees only for client(s) ',
            print opt[1]
        if opt[0] == '-S':
            servs = get_clis_wanted(opt[1])
            optstr = optstr + '-S' + opt[1]
            print 'building trees only for server(s) ',
            print opt[1]
        if opt[0] == '-c':
            connid = int(opt[1])
            optstr = optstr + '-c' + opt[1]
            print 'building trees only for connid ',
            print opt[1]
	if opt[0] == "-m":
            write_err = sys.stderr.write
	    report_mem += 1
	if opt[0] == "-s":
	    startconn = string.atoi(opt[1])
	    optstr = optstr + '-s' + opt[1]
	if opt[0] == "-O":
	    rec_obs = 1
	if opt[0] == "-D":
            if opt[1] == 'p':
                wantfn = find_delays
            elif opt[1] == 's':
                use_saved_delayed_servers = 1
            elif opt[1] == 'ss':
                use_saved_delayed_servers_and_clients = 1
            else:
                print '-D: don\'t understand arg', opt[1]
                usage(scriptname)
	    optstr = optstr + '-D' + opt[1]
	if opt[0] == "-R":
            wantfn = 'rank' + opt[1]
	    optstr = optstr + '-R' + opt[1]
        if opt[0] == "-F":
            if opt[1] == 'h' or opt[1] == 'H':
                filter_help()
                sys.exit(0)
            fspec = string.atoi(opt[1])
            optstr = optstr + '-F' + opt[1]
        if opt[0] == "-T":
            if opt[1] == 'h' or opt[1] == 'H':
                tfilt.help()
                sys.exit(0)
            tfilt.build_filters(opt[1])
            optstr = optstr + '-T' + opt[1]
        if opt[0] == '-N':
            try:
                nob_thresh = int(opt[1])
            except:
                print 'Can\'t understand -N (nob_thresh) argument', opt[1]
                usage(scriptname)
            optstr = optstr + '-N' + opt[1] 

    if optstr and optstr[0] == '-':
        optstr = optstr[1:]
        
    signal(SIGINT, handle_sigint)

    #gc.disable()
    #gc.set_debug(gc.DEBUG_UNCOLLECTABLE)

    openfilelist, counters, basepath = get_files(args)
    basedir = os.path.dirname(basepath) + '/'
    run_start = tv2l(counters.run_start())

    if cachedir == '.' or cachedir == None:
        cachedir = basedir
    namecache = NameCache(cachedir, load=1)
    namecache.use_hostnames()
    lookup = namecache.hostname
    cachefn = namecache.add2cache

    if verb:
        lookup2 = lookup
    else:
        lookup2 = intoa_string

    atexit.register(exitfun)

    #counters.printself("")
    filepath = basepath + 'counters'
    counters.printself_tofile(filepath, '')
    del counters

    basep = basepath + optstr
    if os.path.basename(basep):
        basep += '.'
    basepath = basep + 'NOBT%d' % (nob_thresh)

    if draw_tree:
        logpath = '/dev/null'
    else:
        logpath = (basepath + '_treelog')


    # gc.disable()
    thresh = gc.get_threshold()
    print 'gc thresh =', thresh
    gc.set_threshold(350, 5, 5)
    #thresh = gc.get_threshold()
    #print 'gc thresh =', thresh
    #gc.set_debug(gc.DEBUG_LEAK)
    #gc.set_debug(gc.DEBUG_UNCOLLECTABLE | gc.DEBUG_INSTANCES )
    #gc.set_debug(gc.DEBUG_UNCOLLECTABLE | gc.DEBUG_INSTANCES | gc.DEBUG_OBJECTS | gc.DEBUG_SAVEALL)
    
    #
    # get a stats collector
    #
    invoke_args = ''
    for a in optlist:
        for aa in a:
            invoke_args += '%s' % (aa)
        invoke_args += ' '
    for a in args:
        invoke_args += '%s ' % (a)
    

    stats = TreeStats(invoke_args, quiet=quiet, savedata=savedata,
                      logpath=logpath, rec_obs=rec_obs)
    #stats.logpath = logpath
    stats.basepath = basep
    stats.basedir = basedir
    stats.lookup = lookup
    stats.lookup2 = lookup2
    stats.run_start = run_start
    write_log = stats.write_log
    
    stats.ectrs = E_Counters()
    stats.tfilt = tfilt
    stats.load_server_assoc()
    stats.nob_thresh = nob_thresh
    stats.open_savefiles()

    
    if wantfn:
        wantdicts = pre_pass_A(openfilelist, nrecs, wantfn, fspec, stats)
        print 'Using %d pre_passed delayed servers (%d clients)' % (len(wantdicts[0]), len(wantdicts[1]))
        lwd = len(wantdicts[0])
        strict_wantdict = None
        fspec = 0
    elif use_saved_delayed_servers:
        wantdicts = stats.get_delayed_servers()
        strict_wantdict = None
        print 'Using %d pre_saved delayed servers (%d clients)' % (len(wantdicts[0]), len(wantdicts[1]))
        lwd = len(wantdicts[0])
    elif use_saved_delayed_servers_and_clients:
        wantdicts = None
        strict_wantdict = stats.load_delayed_servers()
        print 'Using %d pre_saved delayed servers with clients' % (len(strict_wantdict))
        lwd = len(strict_wantdict)
    else:
        wantdicts = None
        strict_wantdict = None

    if wantdicts and wantdicts[0]:
        stats.add_server_assoc(wantdicts[0])
        print 'Added %d associated servers' % (len(wantdicts[0]) - lwd)

    if strict_wantdict:
        stats.add_strict_server_assoc(strict_wantdict)
        print 'Added %d associated servers' % (len(strict_wantdict) - lwd)

    #
    # Run through the logs, apply filters, build per client list of file references
    #
    pass1(openfilelist, stats, nrecs, clis, servs, wantdicts,
          strict_wantdict, connid, startconn, fspec, tfilt, cachefn)
    obdict = stats.obdict
    cdict = stats.cdict
    obdict['filepath'] = basepath
    del wantdicts
    
    #refs = [rec for rec in obdict.values() if rec.Class == 'WebClient']
    refs = [FileRec('WebClient', [obdict[c].recsets for c in cli]) \
            for cli in cdict.values()]

    print 'x', len(stats.sdict), 'servers'
    print 'x', len(stats.cdict), 'clients'
    print 'x', len(refs), 'refs'
    if quiet:
        del stats.sdict
        del stats.cdict
        del cdict

    stats.nclients = nrefs = len(refs)
    #print 'nrefs =', nrefs

    #print 'First collect'
    #print gc.collect()
    #raw_input('...')

    if report_mem:
        malloc_report()
        print

    #
    # Just fire up a window, construct clients from file records and display
    #
    if draw_tree:
        from np_TScreen import Tscreen
        print 'Pass 2: reconstructing clients prior to drawing'
        stats.ectrs.printself(tfilt)
        i = 1
        tlist = []
        lookups = stats.nslookups
        for c in refs:
            overprint('Constructing client %d of %d' % (i, nrefs))
            C = c.reconstruct(lookup=lookup)
            try:
                C.lookups = lookups[C.addr]
            except KeyError:
                # no NS lookups from this client
                pass
            tlist.append(C)
            i += 1
        Tscreen(tlist, lookup=lookup2, standalone='yes',
                path=basepath, trace=trace)

    #
    # Construct clients from file records, make trees, log significant findings
    #

    else:
        sys.stdout.flush()
        sys.stderr.write( 'Pass 2: reconstructing %d clients and building reference trees' % (nrefs))
        print
        for i in range(len(refs)):
            c = refs[i]
            overprint('Constructing client %d of %d - %d pages' % (i,
                                                      nrefs, stats.npages))
            try:
                C = c.reconstruct(lookup=lookup, logfun=write_log, trace=trace)
                C.make_tree(tmbase=run_start, logfun=write_log, trace=trace)
                stats.get_data(C)
            except:
                str = 'WebClient #%s page modelling failure:' % (C.addr_str())
                whoops(str)
                stats.write_log(str)
                if trace:
                    inform(str)
            C.tidy_up()
            if not i%100 and report_mem:
                sys.stdout.flush()
                write_err('Client %d of %d - %d pages\n' % (i, nrefs,
                                                          stats.npages))
                write_err('gc: '),
                write_err('len garbage list %d\n' % (len(gc.garbage)))
                #for g in gc.garbage:
                    #write_err(g.__str__())
                    #print g
                malloc_report()
                del gc.garbage[:]
                
                write_err('\n\n')
            if quiet:
                refs[i] = None
        sys.stdout.flush()
        sys.stderr.write('%d clients constructed' % (nrefs))
        print

        print
        print 'conns_accepted', scratch.conns_accepted
        print 'conn_recons', scratch.conn_recons
        print 'conns_intrees', scratch.conns_intrees

        stats.results()
	    
##############################################################################


# Call main when run as script
if __name__ == '__main__':
        main()


