###############################################################################
#                                                                             #
#   Copyright 2005 University of Cambridge Computer Laboratory.               #
#                                                                             #
#   This file is part of Nprobe.                                              #
#                                                                             #
#   Nprobe is free software; you can redistribute it and/or modify            #
#   it under the terms of the GNU General Public License as published by      #
#   the Free Software Foundation; either version 2 of the License, or         #
#   (at your option) any later version.                                       #
#                                                                             #
#   Nprobe is distributed in the hope that it will be useful,                 #
#   but WITHOUT ANY WARRANTY; without even the implied warranty of            #
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the             #
#   GNU General Public License for more details.                              #
#                                                                             #
#   You should have received a copy of the GNU General Public License         #
#   along with Nprobe; if not, write to the Free Software                     #
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA #
#                                                                             #
###############################################################################


from __future__ import generators

from print_col import *

#from urlparse import *
from nprobe import DUMMY_VALUE, LR_TS, LR_SCRIPTED, LR_ARCHIVE, LR_INLINE, \
     LR_LINK, LR_UNKNOWN, LR_REL_BASE, LR_END_BASE, LR_REDIRECT, \
     LR_SCRIPT_ARCHIVE, LR_SCRIPTED_LINK, LR_SCRIPTED_INLINE, \
     LR_SCRIPTED_UNKNOWN, LR_REL_SCRIPT_BASE, LR_END_SCRIPT_BASE, \
     LR_REDIRECT_INLINE, LR_REDIRECT_LINK, LR_REFRESH_URL, LR_REFRESH_SELF, \
     LR_REFRESH, LR_HAS_URL
from nprobe import TCP_CLIENT_SEEN, TCP_SERVER_SEEN
from nprobe import TCP_SERV_SYN, TCP_CLI_SYN, TCP_SERV_HTTP, SERVER, CLIENT, \
     TRANS_VAL, TRANS_DUMMY_UNSYNCH, TRANS_DUMMY_ERR, TRANS_ERR, \
     TRANS_INCOMPLETE, TRANS_FINISHED
from nprobe import TSP_SYN, TSP_DUP_SYN, TSP_FIN, TSP_ACKSYN, TSP_RST, \
     CT_NON_OBJECT, CT_DUMMY

from nprobe import linkrec
from nprobe import intoa_string, _free

from  np_longutil import *
from np_TCPConn import D_DUPCSYN, D_DUPFREQ, D_CRETRANS, D_NOT_CONNECTED, \
     D_REQNOTSEEN, D_REPNOTSEEN
from np_HTTPTrans import LinkRec, DummyTransaction
#from np_TCP import *
from print_col import *

from np_bitmaps import short_dash, short_dash_inv, med_dash, med_dash_inv

import sys
import scratch

##############################################################################
##############################################################################

#
# Help for TCP
#

TH_SYN = 0x2
TH_RST = 0x4
TH_FIN = 0x1

##############################################################################
##############################################################################

#
# Event types - ordering (for some) is important as used for sort
#


EV_NSOP = 10
EV_NSCLO = 20
EV_CONNOP = 30
EV_CONN_CPKT = 40
EV_CONN_CSYN = 45
EV_CONN_SPKT = 50
EV_CONN_SSYN = 55
EV_CONN_CACKSYN = 60
EV_CONN_SACKSYN = 70
EV_REQBEGIN = 80
EV_REQSTART = 90
EV_REQEND = 100
EV_REPSTART = 110
EV_REPEND = 120
EV_DELREPEND = 125
EV_REQFINISH = 130
EV_CONN_CFIN = 140
EV_CONN_SFIN = 150
EV_CONN_CRST = 160
EV_CONN_SRST = 170
EV_CONNCLO = 180

#############################################################################
#############################################################################

#
# Link types for non-parsed types (others defined in parse_object.h)
# - must all be > 0xffff
#

DUM =  0x10000 # dummy link
# mark dummy/referrer links as inline or link
#D_INLINE = 0x20000
D_INLINE = (DUM | LR_INLINE)
#D_LINK = 0x40000
D_LINK = (DUM | LR_LINK)

D_DTRANS_LINK = 0x20000 # link to unseen claimed referrer dummy transaction
D_DTRANS_INLINE = 0x40000 # inline do

D_FAIL = 0x80000 # dummy link for failed transaction

LR_MULTI =  0x100000 # multiple fetch
LR_INLINE_MULTI = (LR_MULTI | LR_INLINE)

LR_POSS_REVISIT = 0x200000
LR_REVISIT = (LR_POSS_REVISIT | LR_LINK)

LR_FAIL = (DUM | D_FAIL)
LR_DUMMY_INLINE = (DUM | D_INLINE)
LR_DUMMY_LINK = (DUM | D_LINK)

LR_FOLLOW_ON = (DUM | LR_INLINE | D_FAIL) # dummy link from failed transaction

# Links from dummy trans marked as inline, as always followed
LR_REFERRER_INLINE = (D_DTRANS_INLINE | D_INLINE)
LR_REFERRER_LINK = (D_DTRANS_LINK)

# Links to page roots
LR_ROOT = (LR_LINK | LR_REFRESH | D_DTRANS_LINK)

# Links which may indicate new container object
LR_NEW_CONT = (LR_ROOT | LR_REDIRECT)

#############################################################################
#############################################################################

#
# Cut-off periods for auto redirect/inline fetches
# - ARBITRARY
#

AUTO_REDIRECT_MAX_PER = 10*1000000
INLINE_MAX_PER = 60 * 1000000
INLINE_MAX_NOCONN = 60*1000000
MIN_LINK_TM = 2 * 1000000 # not feasible link click in less
MAX_CONT_INLINE_TM = 10*1000000
#############################################################################
#############################################################################


# Gathers all data relevant to a particular observed client/server pair

class WebHosts:
    
    def __init__(self, hostaddr, rooturl=None):

	self.Class = 'WebHosts'
	self.addr = hostaddr
        self.id = hostaddr
	self.iserver = 0

        # if rooturl distinguish ref tree with this root
        self.rooturl = rooturl

        # attachment point for NameCache
        #self.nslookup = lookup

        # lists populated during log scan
	self.connlist = []
        self.translist = []
	self.lookups = []

        self.rootlist = [] # page root objects
        self.unlinkedlist = [] # objects not linked into any tree
        self.refreshlist = [] # objects refreshed - not linked into trees
        self.unreslist = [] # s'be empty
        self.invalidlist = [] # invalid objects
        self.revisitlist = [] # revisited objects - not linked into trees

        self.obtree = None
        #self.obrec = FileRec(self.Class)
        self.tree_made = 0

        #np_glob.start = min(np_glob.start, self.start)

##     def __del__(self):
##         print 'Freeing WebClient'
##         for t in self.translist:
##             if t.linkstrlen:
##                 print 'freeing links'
##                 _free(t.linkstr)
##         print

#############################################################################

    def f_null(self, arg):

        pass

############################################################################# 

    def init_tree(self):

        # data structures populated when tree building
        self.nslist = [] # relevant dns lookups
        self.obdict = {} # objects seen
        self.linkdict = {} # global list of inline links of all types
	self.implied_obs = [] # dummy objects
        self.dummytrans = []
        self.dummylinks = []
        self.eventlist = []
        self.celist = []
        self.uconns = [] # not connected connections

	self.retcodedict = {}
	self.ctypedict = {}
	self.stypedict = {}
	self.uagentsdict = {}
	self.sagentsdict = {}
	self.errsdict = {}

        self.uagentcode = 0

        self.nlinks = 0
        self.nnolinks = 0
        self.trans_failed = 0
        self.trans_invalid = 0

#############################################################################

    def addr_str(self):

        #print self.addr
        return '%s/%s' % (intoa_string(self.addr[0]),
                          intoa_string(self.addr[1]))

#############################################################################

    def host_str(self):

        # just give the client address
        return intoa_string(self.addr[0])

############################################################################# 

    def make_tree(self, caller=None, tmbase=None, logfun=None, trace=0):
        
        if self.tree_made and trace == 0:
            # all done already
            return
        else:
            # either not done, or want it again with the cogs showing
            if self.tree_made:
                self.unmake_tree()
            self.tree_made = 1

        #print 'make_tree'
        #print_stack()

        if logfun == None:
            self.logfun = self.f_null
        else:
            self.logfun = logfun

        if trace:
            self.LinkDict = LinkDict()
            
        # just to record all seen
        self.logfun('WebClient #%s Seen:' \
                      % (self.addr_str()))
            
        self.caller = caller
        self.tmbase = tmbase
        self.trace = trace
        self.deps_trace = self.trace
        self.init_tree()
	self.order_connlist()
	self.build_nslist()
	self.adjust_tm_offsets()
	self.order_translist()
        self.scan_trans()
	self.build_event_list()
        self.find_curr_conns()
        self.find_unlinked()
        if trace and 0:
            for t in self.find_trees(): # just to show us
                pass
        if trace and 1:
            self.process_trees()


        scratch.conns_intrees += len(self.connlist)

        if trace and 0:
            print 'Opfr'
            for cop in self.opfr:
                i = 0
                for v in cop:
                    print i, v
                    i += 1
                print

############################################################################# 

    def unmake_tree(self):

        for c in self.connlist:
            c.lookup = None
            c.ns_attached = None

        del(self.nslist)

        for n in self.obdict.values():
            # includes dummy nodes
            del(n)
        del(self.obdict)

        for v in self.linkdict.values():
            for l in v[1]:
                del(l)
        del(self.linkdict)
        
        del(self.implied_obs)

        for t in self.dummytrans:
            del(t)
        del(self.dummytrans)

        for l in self.dummylinks:
            del(l)
        del(self.dummylinks)
        
        del(self.eventlist)
        del(self.celist)
        del(self.retcodedict)
        del(self.ctypedict)
        del(self.stypedict)
        del(self.uagentsdict)
        del(self.sagentsdict)
        del(self.errsdict)

#############################################################################

    def tidy_up(self):

        # free links bufs
        
        for cc in self.translist:
            if cc.linkstrlen:
                _free(cc.linkstr)

                
## ##                 l = cc.reflink
##                 if l:
##                     if l.target:
##                         del l.target
##                     l.target = None
##                     del cc.reflink
## ##                     del cc.ob
 ##            for ent in C.linkdict.values():
##                 for link in ent[1]:
##                     del link.target
##                     del link.trans

#############################################################################


    #
    # Order the connection list, find earliest open and latest close and delays
    #
    # **Idempotent**
    #

    def order_connlist(self):

        def sf(a, b):
            return a.id - b.id

        trace = 1 and self.trace

	tconns = self.connlist
	tconns.sort(sf)
	self.start = self.abstart = tconns[0].abstart

	i = 0
	end = tconns[0].abclose
	for c in tconns:
	    c.indx = i
	    c.ns_attached = 0
	    i += 1
            end = max(end, c.abclose)
                

	self.end = end

#############################################################################


    #
    # Order the links dictionary transaction lists 
    #

    def order_linksdicts(self):

        def by_tm():
            return int(a.tm - b.tm)

        for l in self.linkdict.values():
            l[1].sort(by_tm)

        for ob in self.obdict.values():
            if ob.is_referrer:
                for l in ob.ldict.values():
                   l[1].sort(by_tm) 

#############################################################################

    #
    # Main loop through all the transactions seen
    #

    def scan_trans(self):

        # for sorting rootlist - abs() because dummy trans orders are < 0
        def by_req(a, b):
            return abs(a.order) - abs(b.order)

        trace = 1 & self.trace
        if trace:
            print 'Scanning trans for client', self.addr_str()

        obdict = self.obdict
        ctypedict = self.ctypedict
        stypedict = self.stypedict
        uagentsdict = self.uagentsdict
        sagentsdict = self.sagentsdict
        errsdict = self.errsdict
        retcodedict = self.retcodedict

        nerrs = 0
        
        n = 1 # start at 1 so any dummy for trans can be differentuated by neg.

        last_trans = None
        curr_referrer = None

        for t in self.translist:

            t.order = n
            n += 1
            t.reflink = None
            t.downlinks = []
            host = t.host

            absurl = t.absurl
            
            #
            # Make node dictionary entry for each container object seen
            #

            if obdict.has_key(absurl):
                node = obdict[absurl]
                if node.isdummy:
                    # replace with a real one
                    dummy = node
                    node = Obnode(absurl)
                    obdict[absurl] = node
                    node.dummy = dummy
                t.ob = node
                t.iscontainer = 1
                    
            elif t.iscontainer:
                node = Obnode(absurl)
                obdict[absurl] = node
                t.ob = node

            else:
                t.ob = None

                
            # record any links in object
            got_redirect = t.get_links()

            if t.linkstrlen:
                node.reqs.append(t)


            #
            # Maintain dictionaries of agents, object types, etc
            #
            if t.cvalid:
                obtype = t.cobtype
                uagent = t.uagent
                err = t.cerr

                # maintain integer mapping of user agents for quick comparison
                try:
                    ua = uagentsdict[uagent]
                except KeyError:
                    ua = [self.uagentcode, 0]
                    uagentsdict[uagent] = ua
                    self.uagentcode += 1

                ua[1] += 1
                t.uac = ua[0]

                if err:
                    if not errsdict.has_key(err):
                        errsdict[err] = 1
                    else:
                        errsdict[err] += 1

            else:
                obtype = CT_NON_OBJECT
                t.uac = -1

            if not ctypedict.has_key(obtype):
                ctypedict[obtype] = 1
            else:
                ctypedict[obtype] += 1


            need_redirect = 0

            if t.svalid:
                obtype = t.sobtype
                retcode = t.retcode
                server = t.server
                err = t.serr

                if server == None:
                    server = 'Unknown'
                if not sagentsdict.has_key(server):
                    sagentsdict[server] = 1
                else:
                    sagentsdict[server] += 1

                if err:
                    if not errsdict.has_key(err):
                        errsdict[err] = 1
                    else:
                        errsdict[err] += 1

                if not retcodedict.has_key(retcode):
                    retcodedict[retcode] = 1
                else:
                    retcodedict[retcode] += 1

                if retcode/100 == 3 and retcode != 304:
                    need_redirect = 1
                    self.logfun('WebClient #%s Redirection:' % (self.addr_str()))
                    #print 'need redirect',
                    #if got_redirect:
                        #print ' got it'
                    #else:
                        #print 'not got'
                    #print '%d got_redirect = %x' % (t.order, got_redirect)

            else:
                obtype = CT_NON_OBJECT

            if not stypedict.has_key(obtype):
                stypedict[obtype] = 1
            else:
                stypedict[obtype] += 1

            if not t.cvalid:
                self.invalidlist.append(t)
                continue

            if need_redirect and not got_redirect:
                t.isfail = 1
                str = 'WebClient #%s No redirect URL: (%d)' % \
                      (self.addr_str(), t.order)
                self.logfun(str)
                if trace:
                    inform(str)
            if need_redirect and got_redirect:
                t.isfail = 0 # over-rides finished/complete as measure of succes
            # fit into reference tree
            t.multi = 0
            t.fon = 0
            self.find_deps(t)
            reflink = t.reflink
            if not reflink and last_trans and t.order < len(self.translist):
                # can we infer from previous?
                if (last_trans.referrer == self.translist[t.order].referrer \
                    or self.translist[t.order].referrer == t.absurl) \
                   and last_trans.reflink:
                    t.referrer = last_trans.reflink.trans.absurl
                    if trace:
                        print 'trying again with referrer', t.referrer
                    self.find_deps(t)
                    
            if reflink:
                self.nlinks += 1
                #t.reflink.trans.downlinks.append(t)
                if t.isfail:
                  self.register_fail(t, trace)
                        
            else:
                self.nnolinks +=1

            if t.isfail:
                self.trans_failed += 1
            #
            # Is this a persistent connection *with* multiple fetches
            #
            if t.connorder == 1:
                #t.TConn.persist = 1
                str = 'WebClient #%s Persistent connection(s):' % \
                      (self.addr_str())
                self.logfun(str)
                if trace:
                    inform(str)

            #
            # Is this a root etc
            #
            if reflink == None:
                    self.unlinkedlist.append(t)
            else:
                type = reflink.type
                if type & LR_ROOT:
                    if type & LR_REFRESH:
                        self.refreshlist.append(t)
                    else:
                        self.rootlist.append(t)
                elif not type & LR_INLINE:
                    self.unreslist.append(t)

            #
            # Check for late in-line fetches not descendent of current
            # container - result of reloads for back button?
            #
            if reflink:
                ltype = reflink.type
                reftrans = reflink.trans
                if not curr_referrer:
                    # first
                    curr_referrer = reftrans
                    if trace:
                        print 'new referrer', curr_referrer.order
                elif reftrans.isimplied:
                    if trace:
                        print 'new referrer', curr_referrer.order
                    curr_referrer = reftrans
                elif reftrans == last_trans:
                    
                    if not last_trans.reflink and last_trans.iscontainer:
                        curr_referrer = last_trans
                        if trace:
                            print 'new referrer', curr_referrer.order
                    else:
                        last_trans.reflink.type &= ~LR_POSS_REVISIT
                        if last_trans.reflink.type & LR_NEW_CONT:
                            curr_referrer = last_trans
                            if trace:
                                print 'new referrer', curr_referrer.order
                        
             ##    elif reftrans != curr_referrer \
##                          and not t.iscontainer \
##                          and not reftrans.isimplied \
##                          and not ltype & LR_REDIRECT:
##                     reflink.set_type(LR_REVISIT)
##                     self.revisitlist.append(t)
##                     str = 'WebClient #%s Link -> revisit - not current referrer:' % \
##                           (self.addr_str())
##                     self.logfun(str)
##                     if trace:
##                         inform(str)

                #
                # Infeasably long fetch times - arbitrary period
                #  ? result of reload - assume re-visit
                #
                try:
                    deadper = t.TConn.open - reftrans.downlinks[-1].TConn.close
                    #llink = reftrans.downlinks[-1].reflink.type
                    #print t.TConn.open, t.reflink.trans.downlinks[-1].TConn.close
                    #print 'conntm', deadper
                except (IndexError, AttributeError):
                    deadper = 0
                    #llink = 0x0
                #print 'deadper=', deadper/1000, 'reqstart=', t.reqstart/1000, 'reflink tm=', reflink.tm/1000
                #if llink & LR_LINK \
                   #or \
##                 if (deadper > INLINE_MAX_NOCONN and t.reqstart - reflink.tm > INLINE_MAX_PER) \
##                    or (not deadper \
##                        and t.reqstart - reflink.tm > INLINE_MAX_PER):
                #if deadper > INLINE_MAX_NOCONN:
                    # assume this is a revisit
                    #print 'revisit?', t.order
                    #if not (t.reflink.type & LR_LINK):
                if (deadper > INLINE_MAX_NOCONN
                    or t.reqstart - reflink.tm > INLINE_MAX_PER) \
                    and (ltype & LR_INLINE):
                    #reflink.type = LR_REVISIT
                    #if not t in self.revisitlist:
                        #self.revisitlist.append(t)
##                         # print 'yes'
                    str = 'WebClient #%s Link -> revisit - time:' % \
                          (self.addr_str())
                    self.logfun(str)
                    if trace:
                        inform(str)
                        
                    refnode = reflink.trans.ob
                    if refnode: 
                        if t.absurl in refnode.ldict:
                            links = refnode.ldict[t.absurl]
                        else:
                            links = None
                        dt = self.make_dummy_trans(t, reflink.trans.absurl,
                                           links, refnode)
                        reflink.target = dt
                        dt.reflink = reflink
                        reflink.type = (LR_REVISIT | LR_LINK)
                        reftrans.downlinks.append(dt)
                        reftrans = dt
                        self.rootlist.append(dt)
                    else:
                        str = 'WebClient #%s Link -> no reflink.trans.ob:' % \
                              (self.addr_str())
                        self.logfun(str)
                        if trace:
                            inform(str)
                        
                reftrans.downlinks.append(t)
            if reflink and not (ltype & LR_POSS_REVISIT):       
                last_trans = t


            
        # add in dummy transactions for inlines - must be roots
        for d in self.dummytrans:
            for t in d.downlinks:
                if t.reflink.type & LR_INLINE:
                    self.rootlist.append(d)
                    break

        # have any unlinked or refreshed now become roots?
        ull = self.unlinkedlist
        for i in range(len(ull)):
            t = ull[i]
            if t.downlinks or t.reflink:
                ull[i] = None
                self.rootlist.append(t)
        self.unlinkedlist = [t for t in ull if t != None]
        
        rl = self.refreshlist
        for i in range(len(rl)):
            t = rl[i]
            if t.downlinks:
                rl[i] = None
                self.rootlist.append(t)
        self.refreshlist = [t for t in rl if t]
        
        il = self.invalidlist
        for i in range(len(il)):
            t = il[i]
            if t.downlinks:
                il[i] = None
                self.rootlist.append(t)
        self.invalidlist = [t for t in il if t]
                
        self.rootlist.sort(by_req)
        

#############################################################################

    #
    # To allow repeat requests to be linked from failures add an entry to
    #  the failed tran's downlink subsidiary links (where find_dep for the
    #  repeat will find it)
    #  - additional field [3] gives failed trans
    #

    def register_fail(self, trans, trace):
        
        l = trans.reflink

        if l:
            #print 'fail link from %d type %x' % (l.trans.order, l.type)

            # always insert sub at original fail - follow links back
            while l and l.type == LR_FOLLOW_ON and l.trans.reflink:
                l = l.trans.reflink
                #print 'fail link now from %d type %x' % (l.trans.order, l.type)
            i = 0
            #print l.subs
            if not len(l.subs):
                l.subs.append([trans.repend, LR_FOLLOW_ON, None, trans])
                if trace:
                    print '%d inserting fail at sub0' % (trans.order)
            else:
                # skip over any subs already followed, then insert this
                for s in l.subs:
                    if (not s[2]) and (not s[1] == LR_FOLLOW_ON):
                        #no target = not followed
                        break
                    i += 1
                # insert before first non-followed sub link
                l.subs.insert(i, [trans.repend, LR_FOLLOW_ON, None, trans])
                if trace:
                    print '%d inserting fail at sub%d' % (trans.order, i)

            str = 'WebClient #%s Fail registered: (%d)' % \
                  (self.addr_str(), trans.order)
            self.logfun(str)
            if trace:
                inform(str)

        
            

            # XXX TODO - get this picked up as dependancy for any re-fetch
#############################################################################

    def build_nslist(self):

        #print self.lookups
        #raw_input('...')

        # **idempotent**

        trace = 1 and self.trace

        # connlist must be sorted before calling

	def cmp_lookup_by_reqtm(a, b):
            #print 'cmp_lookup_by_reqtm', a.absreqtm, b.absreqtm
	    return int(a.absreqtm - b.absreqtm)

	if not len(self.lookups):
	    return
        
	tconns = self.connlist
	start = self.start

	#debugging stuff
	sdict = {}
	for conn in tconns:
	    if not sdict.has_key(conn.server):
		sdict[conn.server] = 0
	addrs = sdict.keys()
	addrs.sort()
	#for s in addrs:
	    #print '%s ' % (nprobe.intoa_string(s))

	sdict = {}
	lookupstart = self.lookups[0].absreqtm
	lastm = self.lookups[0].absreptm
	for l in self.lookups:
	    #print l.absreptm
	    if l.absreptm < lastm:
                str = 'WebClient #%s Lookups not ordered: last %d this %d' % \
		      (self.addr_str(), int(lastm - lookupstart), 
		       int(l.absreptm - lookupstart))
                if trace:
                    whoops(str)
                self.logfun(str)
		#sys.exit(1)
	    lastm = l.absreptm
	    for r in l.rrlist:
		if r.code == 1: 
		    if not sdict.has_key(r.addr):
			sdict[r.addr] = 0
	#for s in addrs:
	    #print '%s: ' % (nprobe.intoa_string(s))

	# the real meat
	nslist = self.nslist
	self.lookups.sort(cmp_lookup_by_reqtm)
	for l in self.lookups:
            #print 'NSL'
            #raw_input('...')
	    thisconn = None
	    for r in l.rrlist:
		if r.code == 1:
		    tm = l.absreptm
		    for conn in tconns:
			if r.addr == conn.server and tm < conn.abstart and not conn.ns_attached:
			    thisconn = conn
			    break
	    if thisconn:
		thisconn.lookup = l # not idempotent
		thisconn.ns_attached = 1 # not idempotent
		nslist.append(l)
		#print 'attaching lookup %s to conn #%d %s' % (nprobe.intoa_string(r.addr), thisconn.indx, nprobe.intoa_string(thisconn.server))
		if l.absreqtm < start:
		    start = l.absreqtm

	self.start = start

#############################################################################

#
# Adjust all timings to offsets from when Client first entered our ken,
# biased by value of self.tmbase (e.g. to relate all trees to common start)
#

    def adjust_tm_offsets(self):

        # **idempotent**

	# self.start = absolute first connection open or ns lookup for this
        # client
        
        # bias for common start time if given, else all relative to self.start
        if self.tmbase == None:
            self.tmbase = self.start
            
	self.start -= self.tmbase
        self.end -= self.tmbase
        start = self.tmbase

	for conn in self.connlist:
	    conn.adjust_tm_offsets(start)
         
        #print 'adjust_tm_offsets: translist'  
	for req in self.translist:
	    # relative to open -> relative to start
            req.TConnopen = open = req.abtmbase-start 
	    req.reqstart = open + req.relreqstart
            #print req.reqstart
	    req.reqend = open + req.relreqend
	    repstart = req.repstart = open + req.relrepstart
	    req.repend = req.delrepend = open + req.relrepend

	for look in self.nslist:
	    # absolute -> relative to start
	    look.reqtm = look.absreqtm - start
	    look.reptm = look.absreptm - start

        #
        # Patch for incorrect pers conn repends
        #
        try:
            for c in self.connlist:
                tl = c.translist
                if len(tl) > 1:
                    #print c.id
                    ti = 1
                    tr = tl[ti]
                    ltr = tl[0]
                    rqs = tr.reqstart
                    lastm = ltr.repend

                    for p in c.spktlist:
                        if p.len:
                            tm = p.tm
                            if rqs < tm:
                                #print ltr.repend/1000, '->', lastm/1000
                                ltr.repend = req.delrepend = lastm
                                ti += 1
                                if ti == len(tl):
                                    break
                                ltr = tr
                                tr = tl[ti]
                                rqs = tr.reqstart
                            lastm = tm
        except AttributeError, es:
            # catch inconsistencies
            
                str = 'TCPConn #%d repend adj: %s' % \
		      (c.id, es)
                if self.trace:
                    whoops(str)
                self.logfun(str)


#############################################################################
	    
    #
    # now that request/response timings have been made absolute can 
    # order the request lists
    #

    def order_translist(self):

	def sf(a, b):
	    if a.cvalid and b.cvalid:
		ret = a.reqstart - b.reqstart
	    elif a.cvalid:
		ret = a.reqstart - b.repstart
	    elif b.cvalid:
		ret = a.repstart - b.reqstart
	    elif a.svalid and b.svalid:
		ret = a.repstart - b.repstart
	    else:
		print 'Goof - two invalid operands to sort_translist_by_reqtm'

            try:
                return int(ret)
            except OverflowError:
                if ret > 0:
                    return 1
                elif ret < 0:
                    return -1
                else:
                    return 0

        trace = 1 and self.trace

        self.translist.sort(sf)

	# sanity
	if len(self.translist) and \
	   (self.translist[0].reqstart < self.connlist[0].open):
            str = 'WebClient #%s First request (%.3f) before first connection (%.3f)' % \
		  (self.addr_str(),
                   self.translist[0].reqstart/1000.0, 
		   self.connlist[0].open/1000.0)
            if trace:
                whoops(str)
            self.logfun(str)
	if len(self.translist) and \
           (self.translist[-1].repend > self.end): #self.translist[-1].TConn.close):
	    str = 'WebClient #%s WHOOPS last request after connection: (%.3f) (%.3f)' \
                  % (self.addr_str(),
                   self.translist[-1].repend/1000.0, 
		   self.translist[-1].TConn.close/1000.0)
            if trace:
                whoops(str)
            self.logfun(str)


#############################################################################

# 
# Build ordered list of significant events 

# - establishes ordering of events and allows time sequence gaps if wanted
#
# **idempotent**
#
    def build_event_list(self):

	def ev_cmp(a, b):
	    if a[0] > b[0]:
		return 1
	    elif a[0] < b[0]:
		return -1;
	    else:
		return a[1] - b[1]

	trace = 1 and self.trace
        
	reqs = self.translist
	conns = self.connlist
	lookups = self.nslist

	eventlist = self.eventlist
	mindur = 0x7FFFFFFFFFFFFFFFL
	maxdur = 0

	latestend = 0L
	for req in reqs:
	    cvalid = req.cvalid
	    svalid = req.svalid
	    if cvalid:
		reqstart = req.reqstart
		reqend = req.reqend
                delrepend = req.delrepend #
		reqdur = req.reqdur = int(reqend - reqstart)
		reqbegin = reqstart
		if not svalid:
		    reqfinish = reqend
		eventlist.append((reqstart, EV_REQSTART, req))
		eventlist.append((reqend, EV_REQEND, req))
		if reqend > latestend:
		    latestend = reqend
		if 0 < reqdur < mindur:
		    mindur = reqdur
	    else:
		req.reqdur = 0
	    if svalid:
		repstart = req.repstart
		repend = req.repend
		repdur = req.repdur = int(repend - repstart)
		if not cvalid:
		    reqbegin = repstart
		reqfinish = repend
		eventlist.append((repstart, EV_REPSTART, req))
		eventlist.append((repend, EV_REPEND, req))
                if req.repend != req.delrepend:
                    eventlist.append((req.delrepend, EV_DELREPEND, req))
                    repend = delrepend
                    repdur = int(delrepend - repstart)
		if repend > latestend:
		    latestend = repend
		if 0 < repdur < mindur:
		    mindur = repdur
	    else:
		req.repdur = 0

	    # spot case where order of req/rep may be compromised
	    if cvalid and svalid:
		if reqbegin > repstart:
		    reqbegin = repstart
		if reqfinish < reqend:
		    reqfinish = reqend

	    eventlist.append((reqbegin, EV_REQBEGIN, req))
	    eventlist.append((reqfinish, EV_REQFINISH, req))

	latest_connend = 0
	inter_pkt_mindur = 0x7FFFFFFFFFFFFFFFL

        # need distinct list of some connection events
        celist = self.celist
        
	for conn in conns:
	    cflags = conn.cflags
	    sflags = conn.sflags
	    optm = conn.open
	    clotm = conn.close

	    conndur = conn.dur = clotm - optm

	    if cflags & TSP_SYN:
	     celist.append((conn.clisyn, EV_CONN_CSYN, conn))

	    if sflags & TSP_SYN:
		celist.append((conn.servsyn, EV_CONN_SSYN, conn))

	    if cflags & TSP_FIN:
		celist.append((conn.clifin, EV_CONN_CFIN, conn))

	    if sflags & TSP_FIN:
		celist.append((conn.servfin, EV_CONN_SFIN, conn))

	    if cflags & TSP_ACKSYN:
		celist.append((conn.cliacksyn, EV_CONN_CACKSYN, conn))

	    if sflags & TSP_ACKSYN:
		celist.append((conn.servacksyn, EV_CONN_SACKSYN, conn))

	    if cflags & TSP_RST:
		celist.append((conn.clirst, EV_CONN_CRST, conn))

	    if sflags & TSP_RST:
		celist.append((conn.servrst, EV_CONN_SRST, conn))

	    celist.append((optm, EV_CONNOP, conn))
	    celist.append((clotm, EV_CONNCLO, conn))
	    

	    if clotm > latest_connend:
		latest_connend = clotm
	    if 0 < conndur < mindur:
		mindur = conndur

	    if (cflags & TSP_SYN) and (sflags & TSP_SYN) and \
	       0 < abs(conn.servsyn - conn.clisyn) < mindur:
		mindur = abs(conn.servsyn - conn.clisyn)

	    if conndur > maxdur:
		maxdur = conndur

	    #last_pkt_tm =  conn.pktlist[0].tm
	    last_pkt_tm =  conn.open
	    for pkt in conn.pktlist:
		tm = pkt.tm
		interpktdur = tm - last_pkt_tm
		if 0 < interpktdur < inter_pkt_mindur:
		    inter_pkt_mindur = interpktdur
		last_pkt_tm = tm
		#way = ord(pkt.way[0])
                way = pkt.dir
		if way == SERVER:
		    event = EV_CONN_SPKT
		elif way == CLIENT:
		    event = EV_CONN_CPKT
		else:
		    print "GOOF - invalid way %d" % (way)
		    pkt.printself_rel()
		    sys.exit(1)
		eventlist.append((tm, event, pkt))
            del last_pkt_tm

        eventlist.extend(celist)
        #celist.sort(ev_cmp)

	if inter_pkt_mindur < mindur:
	    mindur = inter_pkt_mindur

	## print 'Mindur after pkts %s' % (tsLongstring(mindur))

## 	    print '#%2d %9d - %9d %9d (%9d)' % (conn.id, optm/1000, 
## 						    clotm/1000, 
## 						    (clotm-optm)/1000, 
## 						    latest_connend/1000)

	for ns in lookups:
	    reqtm = ns.reqtm
	    reptm = ns.reptm
	    dur = ns.dur = reptm - reqtm
	    
	    eventlist.append((reqtm, EV_NSOP, ns))
	    eventlist.append((reptm, EV_NSCLO, ns))

	    if 0 < dur < mindur:
		mindur = dur
	    if dur > maxdur:
		maxdur = dur
		
	#eventlist.sort(ev_cmp)
	eventlist.sort()
        
	if eventlist[0][1] != EV_CONNOP \
	   and eventlist[0][1] != EV_NSOP \
	   and eventlist[0][2].cobtype != CT_UNSYNCH \
	   and eventlist[0][2].cobtype != CT_POST_ERR \
	   and eventlist[0][2].sobtype != CT_UNSYNCH \
	   and eventlist[0][2].sobtype != CT_POST_ERR:
            str = 'WebClient #%s Event list goof:' % (self.addr_str())
            if trace:
                whoops(str)
                eventlist[0][2].printself()
            self.logfun(str)
	    #sys.exit(1)

	self.mindur = mindur
	self.maxdur = maxdur
	self.latest_reqend = latestend
	self.latest_connend = latest_connend


##############################################################################

#
# Establish # conns concurrently open
#
# cop1 spans open -> close (ie very first to very last pkt)
# cop2 spans open -> client/server FIN or client/server RST
# cop3 spans server ACKSYN -> client/server FIN or client/server RST
#
    def find_curr_conns(self):

        evl = self.celist
        evl.sort()
        #opfr = [[0]*10]*3
        opfr = [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]

        cop1 = 0
        cop2 = 0
        cop3 = 0

	maxcop1 = 0
	maxcop2 = 0
        maxcop3 = 0

        last_tm = evl[0][0]

        for e in evl:
	    tm = e[0]
	    type = e[1]
	    conn = e[2]
            id = conn.indx

            if last_tm > tm:
                print 'sort goof'
                sys.exit(1)
            last_tm = tm

            if type == EV_CONNOP: #always see this
                #print id, tm, 'op', cop1, cop2, cop3,
		# conn open
                conn.cop1 = cop1
                conn.cop2 = cop2
                conn.cop3 = cop3

                for i, n in ((0, cop1), (1, cop2), (2, cop3)):
                    try:
                        opfr[i][n] += 1
                    except IndexError:
                        for k in range(len(opfr[i]), n+1):
                            opfr[i].append(0)
                        opfr[i][n] += 1
                        
                conn.op = 1
                conn.op1 = 1
                conn.op2 = 0
                
                cop1 += 1
                cop2 += 1
                
		maxcop1 = max(maxcop1, cop1)
                maxcop2 = max(maxcop2, cop2)

                conn.close1 = conn.close
                #conn.close1 = max(conn.cldata, conn.sldata)

                #print '->', cop1, cop2, cop3
                
	    elif type == EV_CONNCLO: # always see this
                #print id, tm, 'clo', cop1, cop2, cop3,
		# conn close
                cop1 -= 1
                #conn.op = conn.op2 = 0
                if conn.op1:
                    cop2 -= 1
                    conn.op1 = 0
                    if conn.sdpkts:
                        conn.close1 = conn.sldata
                    elif conn.cdpkts:
                        conn.close1 = conn.cldata
                    else:
                        conn.close1 = conn.close
                        
                if conn.op2:
                    cop3 -= 1
                    conn.op2 = 0

                #print '->', cop1, cop2, cop3
                
	    elif type == EV_CONN_SACKSYN:
                #print id, tm, 'acksyn', cop1, cop2, cop3,
                cop3 += 1
                conn.op2 = 1
                maxcop3 = max(maxcop3, cop3)
                #print '->', cop1, cop2, cop3

            elif type == EV_CONN_CRST \
                              or type == EV_CONN_SRST \
                              or type == EV_CONN_CFIN \
                              or type == EV_CONN_SFIN:
                #print id, tm, 'end', type, cop1, cop2, cop3,
                if conn.op1:
                    cop2 -= 1
                    conn.op1 = 0
                if conn.op2:
                    cop3 -= 1
                    conn.op2 = 0
                #print '->', cop1, cop2, cop3
                conn.close1 = min(conn.close1, tm)

        self.maxcop1 = maxcop1
        self.maxcop2 = maxcop2
        self.maxcop3 = maxcop3
        self.opfr = opfr


##############################################################################

    # make dummy node, transaction and link for unseen explicit referrer
    def make_dummy_node(self, req, referrer):
        refnode = Obnode(referrer)
        refnode.isdummy = 1
        self.obdict[referrer] = refnode
        self.implied_obs.append(refnode)
        dt = DummyTransaction(req.referrer, refnode, req.reqstart,
                              req.connid, req.uac)
        dt.firstrans = req
        dt.order = -req.order
        self.dummytrans.append(dt)
        refnode.reqs.append(dt)
        refnode.dummy = dt
        dl = DummyLink(req, req.reqstart, dt)
        self.dummylinks.append(dl)
        refnode.ldict[req.absurl] = [dl.type, [dl]]
        req.reflink = (dl)


##############################################################################

    # make a dummy transaction for seen node
    def make_dummy_trans(self, req, referrer, links, refnode):
        if self.deps_trace:
            print '- making timely dummy >%d uac %d start %d' % \
                                      (req.order, req.uac, req.reqstart/1000)
        #dt = DummyTransaction(referrer, req.ob, req.repstart)
        dt = DummyTransaction(referrer, refnode, req.reqstart, req.connid,
                              req.uac)
        #print 'dummy trans made - reqstart=', dt.reqstart/1000
        dt.firstrans = req
        dt.order = -req.order
        dt.uac = req.uac ###
        self.dummytrans.append(dt)
        refnode.reqs.append(dt)
        dl = DummyLink(req, req.reqstart, dt)
        self.dummylinks.append(dl)
        if links:
            #links.insert(0, dl)
            links.append(dl)
        else:
            refnode.ldict[req.absurl] = [dl.type, [dl]]
        req.reflink = dl

        return dt

##############################################################################

    #
    # Find data dependencies for transactions
    #
    # Have to make some assumptions:
    # - if a referring object given this is `the truth' - unless redirected
    # - in-line objects are fetched following first unfollowed instance of a
    #   link to them
    # - other objects are fetched following last instance of link to them
    # - objects referred in-line and as followable links are got as in-line
    #
		    
    def find_deps(self, req):

        # make dummy link for dummy node
        def make_dummy_link(self, req, node):
            #dl = DummyLink(req, req.reqstart, node.dummy)
            #dl = DummyLink(req, req.reqstart, node.reqs[-1])
            trans = node.reqs[-1]
            if trans.isimplied:
                ltm = trans.repend
            else:
                ltm = (trans.repend - trans.repstart)/2
            dl = DummyLink(req, ltm, trans)
            #print 'trans=', trans.order, 'tm=', ltm
            self.dummylinks.append(dl)
            #node.reqs.append(req)
            node.ldict[req.absurl] = [dl.type, [dl]]
            req.reflink = (dl)
            if trace:
                print '- added link to dummy >%d' %\
                      (-node.reqs[-1].order)

        #move dummy link time forward
        def make_link_earlier(link, req):
            if trace:
                print '- modifying dummy >%d time' % \
                      (-link.trans.order)
            link.tm = req.reqstart
            link.target = req
            link.trans.repstart = req.reqstart
            link.trans.firsttrans = req
            link.trans.order = -req.order
            req.reflink = link

        # Provide link missing from referrer's links - first time link needed
        def insert_referrer_link(self, refnode, req):

            def by_tm(a, b):
                return int(a.tm-b.tm)

            ll = None # poss guide link
            gotone = 0
            #ft = refnode.reqs[0]
            #print 'reqstart', req.reqstart
            #for ft in refnode.reqs:
                #print ft.order, ft.repstart
            for ft in refnode.reqs:
                #print ft.order, ft.repstart, req.reqstart
                if ft.repstart > req.reqstart:
                    break
                if ft.uac == req.uac:
                    gotone = 1
            if gotone:
                if trace:
                    print '- likely transaction (%d) found' % (ft.order)
                # get ordered list of links in referrer for first fetch
                llist = []
                for l in refnode.ldict.values():
                    for ls in l[1]:
                        if ls.trans == ft:
                            llist.append(ls)
                if len(llist):
                    if trace:
                        print '- possible links found'
                    llist.sort(by_tm)
                    # find latest link giving rise to transaction earlier than this
                    for l in llist:
                        if l.trans.order < req.order:
                            ll = l
                if ll:
                    if trace:
                        print '- found guide link'
                    tm = ll.tm
                    ft = ll.trans ##
                else:
                    if trace:
                        print '- link not found - using repstart time'    
                    tm = ft.repstart
            else:
                tm = req.TConnopen
                if trace:
                    print '- no appropriate referrer fetch found, making dummy trans .%d' % (req.order)
                ft = DummyTransaction(refnode.absurl, refnode, tm, req.connid,
                                      req.uac)
                ft.firstrans = req
                ft.order = -req.order
                self.dummytrans.append(ft)
                ft.obnode = refnode
                refnode.dummy = ft
                refnode.reqs.insert(0, ft)

            # make a link
            dl = DummyLink(req, tm, ft)
            self.dummylinks.append(dl)
            refnode.ldict[req.absurl] = [dl.type, [dl]]
            req.reflink = dl
            if trace:
                print 'referrer link inserted'
                print dl

        def add_referrer_link(req, reftrans, links, type):
            #
            #  Provide link missing from referrer's links - link known to exist
            #  but not picked up for latest fetch of referrer
            #
            dl = DummyLink(req, reftrans.repstart, reftrans, type=type)
            self.dummylinks.append(dl)
            links.append(dl)
            req.reflink = dl
            if trace:
                print 'Adding missing link type %x to latest referrer fetch (%d)' \
                      % (dl.type, reftrans.order)



        #  NOT USED
        # give current transaction link from last for an object
        def grab_link(l, req, trace):
            tl = l.trans.downlinks
            tl.reverse()
            for t in tl:
                if t.ob == req.ob:
                    break
            tl.remove(t)
            tl.reverse()
            t.reflink = None
            req.reflink = l
            l.target = req
            if trace:
                print '- moved link from %d to %d' % (t.order, req.order)

        # NOT USED
        # object fetched but all links already followed - link to last fetch
        #  as a follow on
        def add_follow_on(self, l, req, trace, links):
            #print 'add_follow_on'
            if l.target.absurl != req.absurl:
                print 'OUCH %s != %s' % (l.target.absurl, req.absurl)
            
            dl = DummyLink(req, l.target.reqend, l.target, type= LR_FOLLOW_ON)
            self.dummylinks.append(dl)
            print l.trans.ob
            
            try:
                l.trans.ob.ldict[req.absurl][1].append(dl)
                print 'dict OK'
            except KeyError:
                print 'FOO Key error'
            except IndexError:
                print 'FOO Index error'
            except:
                print 'FOO other error'
            req.reflink = dl
            if trace:
                print '- added follower link from %d to %d' % (l.target.order, req.order)
                
                        
        # find earliest followed in-line link in links list
        def find_inline(req, links):
            for l in links:
                if l.tm > req.reqstart: # now too late
                    return
                if (not l.target) and (l.type & LR_INLINE):
                    l.target = req
                    req.reflink = l
                    if trace:
                        print '- in-line found (%d)' % (l.trans.order)
                    return

        # find latest followed link in links list
        def find_last_inline(req, links, refnode):
            ll = None
            for l in links:
                if l.tm > req.reqstart: # now too late
                    if trace:
                        print 'find_last_inline - balance of links too late'
                    break
                #print l
                if (l.type & (LR_INLINE | LR_FOLLOW_ON)) \
                   and (not l.target) and l.trans.uac == req.uac:
                    #print 'yes'
                    ll = l
            if ll:
                if not refnode:
                    #refnode = links[0].trans.ob
                    reftrans = ll.trans
                    refnode = reftrans.ob
                else:
                    reftrans = refnode.reqs[-1]
                #print 'll is'
                #print ll
                #print 'reftrans is', reftrans.order
                if ll.trans != reftrans:
                    add_referrer_link(req, reftrans, links, ll.type)
                    #if trace:
                        #raw_input('...')
                    #raw_input('...')
                else:
                    if trace:
                        print '- last inline found (%d)' % (ll.trans.order)
                    ll.target = req
                    req.reflink = ll
                if req.iscontainer and ll.trans.downlinks \
                   and req.reqstart - ll.trans.downlinks[-1].reqend > MAX_CONT_INLINE_TM:
                    ll.type &= ~LR_INLINE
                    ll.type |= LR_LINK
                    if trace:
                        print 'link converted to LINK'
            elif trace:
                print 'find_last_inline - no link found'
        
        # find earliest subsidiary of last in-line link in links list
        def find_inline_sub(self, req, links):
            if trace:
                print '- looking for multiple in-line fetch'
            ll = None
            for l in links:
                if l.tm > req.reqstart: # now too late
                    break
                if (l.type & LR_INLINE):
                    ll = l
            if ll:
                if trace:
                    print 'master found from %d' % (ll.trans.order)
                i = 0
                for sub in ll.subs:
                    if sub[0] > req.reqstart:
                        if trace:
                            print 'sub %d too late' % (i)
                        break
                    if trace:
                        print '- trying sub %d' % (i)
                    if not sub[2]:
                        if trace:
                            print 'got sub %d' % (i)
                        # not been followed 
                        dl = DummyLink(req, sub[0], ll.trans)
                        #dl.type = LR_INLINE_MULTI #sub[1]
                        req.reflink = dl
                        sub[2] = req

                        if sub[1] == LR_FOLLOW_ON:
                            # follow on for a fail
                            dl.type = LR_FOLLOW_ON
                            dl.trans = sub[3]
                            req.fon = 1
                            str = 'WebClient #%s Follow-on: (%d)' % \
                                  (self.addr_str(), req.order)
                            logfun(str)
                            if trace:
                                print '- follow on found (%d->%d)' % \
                                      (sub[3].order, req.order)
                        elif (sub[1] & LR_INLINE):
                            # simple in-line link
                            dl.type = LR_INLINE_MULTI
                            req.multi = 1
                            str = 'WebClient #%s Multiple fetch: trans %d' % \
                                  (self.addr_str(), req.order)
                            logfun(str)
                            if trace:
                                print '- multiple found (%d)' %  (req.order)
                                #
                        break
                    i += 1

        # find latest followed link in links list
        def find_link(req, links):
            ll = None
            for l in links:
                if l.tm > req.reqstart: # now too late
                    break
                if (not l.target) and l.trans.uac == req.uac:
                    ll = l
            if ll:
                req.reflink = ll
                ll.target = req
                if trace:
                    print 'find_link - type %x %s found (%d>)' % \
                          (ll.type, self.LinkDict.link_str(ll.type),
                           ll.trans.order)
            elif trace:
               print 'find_link - no link found' 

        # find latest in-line redirect link
        def find_inline_redirect(req, links):
            ll = None
            reqtm = req.reqstart
            if links[0].tm > reqtm:
                if trace:
                    print '- inline_redirect no timely link exists'
                return None
            for l in links:
                if l.tm > reqtm: # now too late
                    if trace:
                        print '- inline_redirect balance of links too late'
                    break
                    #return None
                if (not l.target) \
                   and l.type == LR_REDIRECT_INLINE:
                    #l.target = req
                    #req.reflink = l
                    if trace:
                        print '- in-line redirect found (%d)' %\
                              (l.trans.order)
                    ll = l
            return ll

                
        # find earliest in-line redirect link - request has referrer
        def find_inline_redirect_withref(req, links):
            reqtm = req.reqstart
            if links[0].tm > reqtm:
                if trace:
                    print '- redirect withref no timely link exists'
                return (None, None, None)
            rref = req.referrer
            la = None
            lc = None
            ls = None
            for l in links:
                if l.tm > reqtm: # now too late
                    if trace:
                        print 'redirect withref balance of links too late'
                    break
                if (not l.target) \
                   and l.type == LR_REDIRECT_INLINE:
                    if not lc and rref == l.trans.referrer:
                        lc = l
                    if not ls and rref == l.trans.absurl:
                        ls = l
                    if not la and not l.trans.referrer:
                        la = l
                if la and lc and ls:
                    break
                
            if trace:
                if la:
                    print '- in-line redirect (noref) found (%d)' % \
                          (la.trans.order)
                if lc:
                    print '- in-line redirect with common referrer found (%d)'\
                          % (lc.trans.order)
                if ls:
                    print '- in-line redirect as referrer found (%d)' % \
                          (ls.trans.order)
                if not (la or lc or ls):
                    print 'redirect withref - nothing found'
            return (la, lc, ls)

        # find latest redirect link in links list
        def find_link_redirect(req, links):
            ll = None
            for l in links:
                if l.tm > req.reqstart: # now too late
                    break
                if l.type == LR_REDIRECT_LINK:
                    ll = l
            if ll:
                #ll.target = req
                #req.reflink = ll
                if trace:
                    print '- redirect link found (%d)' % (ll.trans.order)
            elif trace:
                print '- redirect link none found'
            return ll

        # attempt to infer link in explicit referer by reference to neighbours
        def find_slot_referrer(req, refnode):
            gotit = None
            indx = req.order - 1
            for i in [indx-1, indx+1]:
                if not 0 <= i < len(self.translist):
                    continue
                t = self.translist[i]
                if t.reflink and t.uac == req.uac and t.referrer == req.referrer:
                    gotit = t
                    break

            if gotit:
                if trace:
                    print 'slot referrer found inferred from %d' % (i+1)

                # make a link
                #dl = DummyLink(req, req.reqstart, t.reflink.trans)
                dl = DummyLink(req, t.reflink.tm, t.reflink.trans)
                #dl.type = LR_REFERRER
                self.dummylinks.append(dl)
                refnode.ldict[req.absurl] = [dl.type, [dl]]
                req.reflink = dl

        #
        # FN STARTS HERE
        #
        
        trace = self.deps_trace
        logfun = self.logfun

        ldict = self.linkdict


        reqtm = req.reqstart
        reqobj = req.absurl
        referrer = req.referrer
        req.reflink = None


        if trace:
            print '******************************************************'
            print 'Finding referrer for %d %s uac %d' % (req.order,
                                                         reqobj, req.uac)

        #
        # Is this the result of an in-line re-direction
        # - check first as any referrer field is unlikely to point to
        #   correct transaction

        redlink = None
        try:
            f = ldict[reqobj]
            type = f[0]
            links = f[1]
            #if (type & LR_REDIRECT):
                #print 'redirect for '
            if (type & LR_REDIRECT) and (type & LR_INLINE):
                if trace:
                    print 'possible in-line redirect'
                if referrer:
                    la, lc, ls = find_inline_redirect_withref(req, links)
                    if ls:
                        redlink = ls # redirect is the referrer
                    elif lc:
                        redlink = lc # redirect has same referrer
                    else:
                        redlink = la # redirect has no referrer

                    if not redlink: # referrer claim lied
                        redlink = find_inline_redirect(req, links)
                else:
                    redlink = find_inline_redirect(req, links)

                # does it look like this is inline - 10s is arbitrary
                if redlink and req.reqstart - redlink.tm > 10000000:
                    if (type & LR_LINK):
                        # auto not worked - manual follow
                        if trace:
                            print 'auto redirect too late - looking for link'
                        redlink = find_link_redirect(req, links)
                    else:
                        redlink = None
                        if trace:
                            print 'auto redirect too late'
            else:
                # not inline
                redlink = find_link_redirect(req, links)
        except KeyError:
            pass

        if redlink:
            # got one
            redlink.target = req
            req.reflink = redlink
            if (not req.reflink.trans.reflink) and referrer and referrer != req.reflink.trans.absurl:
                # referrer hasn't a referrer and isn't this one's
                # - see if it can use this one's
                req = req.reflink.trans
                reqtm = req.reqstart
                reqobj = req.absurl
                if trace:
                    print 'attempting to find redirect\'s referrer using borrowed'
                    #raw_input('...')        
            else:
                # done all we can
                return

        # find links matching any referrer field
        if referrer:
            # have we seen the claimed referrer
            try:
                refnode = self.obdict[referrer]
                if trace:
                    if refnode.isdummy:
                        dstr = '(dummy)'
                    else:
                        dstr = ''
                    print '- referring node %s %s found' % (dstr, referrer)
                try:
                    f = refnode.ldict[reqobj]
                    type = f[0]
                    links = f[1]
                    firstlink = links[0]
                    # link there
                    if trace:
                        print 'link entry found'
                    if refnode.isdummy:
                        #
                        # need to check feasability of link from existing
                        # dummy trans
                        #
                        #print 'link uac', links[-1].target.uac
                        #print 'link uac', refnode.reqs[-1], refnode.reqs[-1].uac
                        #print 'req uac', req, req.uac
                        #print links
                        #if req.is_referrer():
                        if req.iscontainer:
                            # make a new timely dummy trans and link
                            #print 'referrer new dummy'
                            self.make_dummy_trans(req, referrer, links, refnode)
                        #elif links[-1].target.uac != req.uac:
                        elif refnode.reqs[-1].uac != req.uac:
                            #print 'uac new dummy'
                            self.make_dummy_trans(req, referrer, links, refnode) 
                        else:
                            #  XXX TODO test to avoid infeasably late linking
                            make_dummy_link(self, req, refnode)
                    elif firstlink.tm > reqtm:
                        if trace:
                            print 'too late', firstlink.tm, reqtm
                            print links
                        # but all fetches too late
                        if firstlink.type & DUM:
                            # earlist occurence already a dummy
                            make_link_earlier(firstlink, req)
                        else:
                            # make a timely dummy transaction
                            self.make_dummy_trans(req, referrer, links, refnode)
                    else:
                        # a candidate fetch exists
                        if (type & LR_INLINE):
                            find_last_inline(req, links, refnode)
                            if not req.reflink:
                                find_inline_sub(self, req, links)
                        else:
                            find_link(req, links)

                        if not req.reflink:
                            str = 'WebClient #%s Referring object links exhausted: %s->%s'% (self.addr_str(), referrer, reqobj)
                            logfun(str)
                            if trace:
                                inform(str)
                            if not req.iscontainer:
                                find_slot_referrer(req, refnode)


                except KeyError:
                    # no link recorded
                    if trace:
                        print 'no link entry found'
                    if refnode.isdummy:
                        #
                        # need to check feasability of link from existing
                        # dummy trans
                        #
                        #if req.is_referrer():
                        if req.iscontainer and \
                        req.reqstart - refnode.reqs[-1].reqstart > MIN_LINK_TM:
                            # make a new timely dummy trans and link
                            self.make_dummy_trans(req, referrer, None,
                                             refnode)
                        elif refnode.reqs[-1].uac != req.uac:
                            if trace:
                                print 'uac new dummy'
                                print refnode.reqs[-1].order,
                                refnode.reqs[-1].uac
                            self.make_dummy_trans(req, referrer, None,
                                             refnode)
                        else:
                            # XXX TODO test to avoid infeasably late linking
                            make_dummy_link(self, req, refnode)
                    else:
                        # link not found in referring object
                        str = 'WebClient #%s Link not found in referring object: %s -> %s (%d)'% (self.addr_str(), referrer, reqobj, req.order)
                        logfun(str)
                        if trace:
                            inform(str)
                            print '- inserting link'
                        insert_referrer_link(self, refnode, req)

            except KeyError:
                # havn't seen referrer - assume fetched but not seen,
                # provide dummy to represent it and assumed fetch
                if trace:
                    print '- claimed referring object %s not seen' % (referrer)
                    print '- making dummy >%d, uac %d' % (req.order, req.uac)
                self.make_dummy_node(req, referrer)

        else:
            # no explicit referrer
            referrer = None
            if trace:
                print '- no explicit referrer'

        if not (referrer and req.reflink):
            # no referrer or link not found in referrer

            if trace:
                print '- trying global links'

            try:
                f = ldict[reqobj]
                type = f[0]
                links = f[1]
            except KeyError:
                links = None
                if trace:
                    print '- no link found'

            if links:
                if links[0].tm > reqtm:
                    if trace:
                        print '- no timely link found'
                else:
                    if referrer and (type & LR_REDIRECT):
                        l = None
                        # has it been redirected?
                        if trace:
                            print '- looking for redirection'

                        if (type & LR_INLINE):
                            l = find_inline_redirect(req, links)
                            # does it look like this is inline - 10s is arbitrary
                            if l and req.reqstart - l.tm > 10000000:
                                if (type & LR_LINK):
                                    # auto not worked - manual follow
                                    l = find_link_redirect(req, links)
                                    if trace:
                                        print 'auto too late, link?'
                                else:
                                    l = None
                                    if trace:
                                        print 'auto too late'
                        else:
                            l = find_link_redirect(req, links)

                        if l:
                            l.target = req
                            req.reflink = l

                    elif ((type & LR_INLINE)):
                        find_last_inline(req, links, None)
                        if not req.reflink:
                            find_inline_sub(self, req, links)
                    else:
                        find_link(req, links)

                    if trace and not req.reflink:
                        print 'global links exhausted'
        
                    
#############################################################################

    #
    # Find unconnected objects
    #

    def find_unlinked(self):

        trace = 1 and self.trace

        for t in self.translist:
            if (not t.reflink) and t.svalid and (not t.sisdummy) and len(t.downlinks) == 0:
                str =  'WebClient #%s Unlinked object: %s (%d)'% (self.addr_str(), t.absurl, t.order)
                self.logfun(str)
                if trace:
                    inform(str)
                
                    
#############################################################################

                            

    #
    # Print out representation of sub-trees identified
    #

    def show_trees(self):

        def by_reqstart(a, b):

            return (int)(a.reqstart - b.reqstart)

        def print_tree(print_tree, t, indent, ind):
            if t.reflink:
                try:
                    ltxt = linktypes_dict[t.reflink.type][1]
                except KeyError:
                    ltxt = linktypes_dict[0][1]
            else:
                ltxt = 'Root'
            print '%s%d %s' % (indent, t.order, ltxt)
            for h in t.downlinks:
                print_tree(print_tree, h, indent+ind, ind)


        tl = self.translist
        hl = [t for t in tl if t.reflink == None]
        hl.extend(self.dummytrans)

        hl.sort(by_reqstart)

        ind = '    '
        indent = ''
        for h in hl:
            print_tree(print_tree, h, indent, ind)
                
                    
#############################################################################

    #
    # How long to get significant pages
    # GENERATOR
    #

    def find_trees(self):

        def follow_tree(t, lastm, ind):
            #print 'ft lastm %d' % (lastm)
            if trace:
                if t.reflink:
                    try:
                        ltxt = linktypes_dict[t.reflink.type][1]
                    except KeyError:
                        ltxt = linktypes_dict[0][1]
                else:
                    ltxt = 'Root'
                print '%sTrans %d %s ->%s end %d' \
                      % (ind, t.order, ltxt, t.ob.absurl, t.repend/1000)
            
            c=1
            sz = t.nbytes
            hl = t.hdrlen
            lastm = max(lastm, t.repend)
            t.treeorder = treeorder # label trans with tree
            for h in t.downlinks:
                if (h.reflink.type & LR_ROOT):
                    continue
                n, b, h, l = follow_tree(h, lastm, ind+indent)
                c+=n
                sz += b
                hl += h
                lastm = max(l, lastm)
            
            #print 'ret lastm %d' % (lastm)
            #return (c, lastm)
            return (c, sz, hl, lastm)

        #
        # Fn starts here
        #

        trace = 1 and self.trace
        #trace = 1

        page_times = []

        ind = ''
        indent = '   '

        if trace:
            print '\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
            print 'Trees for client %s:-' % (self.addr_str())
            #i = 0

        treeorder = 0
        for h in self.rootlist:
            nob, bytes, hlens, lastm = follow_tree(h, 0, ind)
            if h.isimplied:
                # dummy transaction
                server = h.firstrans.servaddr
                nob -= 1 # don't count dummy
            else:
                server = h.servaddr
            if trace:
                #i += 1
                print 'XXXXXXXXXXXXXXXX'
                print 'Tree %d Root %d %s Primary server %s \n %d obs %d bytes %d hbytes start %dms end %dms dur %dms' \
                      % (treeorder, h.order, h.ob.absurl, intoa_string(server),
                         nob, bytes, hlens,
                         h.reqstart/1000, lastm/1000, (lastm-h.reqstart)/1000)
                print 'XXXXXXXXXXXXXXXX'
            treeorder += 1
            yield(server, h.order, nob, bytes, hlens, h.reqstart,
                               lastm-h.reqstart)
            
        return
                    
#############################################################################

    #
    # How long to get significant pages
    # GENERATOR
    #

    def find_trees2(self):

        def follow_tree(t):
            # GENERATOR
            #print 'ft lastm %d' % (lastm)
            if trace:
                if t.reflink:
                    ltxt = '%s %d' % (self.LinkDict.link_str(t.reflink.type),
                                      t.reflink.trans.order)
                else:
                    ltxt = 'Root'
                    print '%s Trans %d link %s ->(%s) end %d' \
                      % (self.addr_str(), t.order, ltxt, t.absurl,
                         t.repend/1000)
            
            sz = t.nbytes
            hl = t.hdrlen
            #yield(t.nbytes, t.hdrlen, t.repend, t.connid, t.servaddr)
            yield(t)
            for h in t.downlinks:
                if (h.reflink.type & LR_ROOT):
                    continue
                for x in follow_tree(h):
                    yield x

        #
        # Fn starts here
        #

        trace = self.trace
        #self.rootlist.extend(self.unlinkedlist)
        for h in self.rootlist:
            if h.TConn:
                tc = h.TConn
                order = h.connorder
                if order == 0:
                    start = tc.open
                else:
                    start = h.reqstart
                sid = tc.server
                ua = h.uagent
                sa = h.server
            else:
                start = None
                order = 0
                sid = None
                ua = None
                sa = None
            if h.reflink:
                ltype = h.reflink.type
            else:
                ltype = 0
            #yield(follow_tree(h), h.absurl, h.connid, treeorder, h.reqstart)
            yield(follow_tree(h), h.absurl, sid, h.connid, order, start, ua,
                  sa, ltype, h.sobtype)
            
        return
                    
#############################################################################

    #
    # Collect trans and conns forming tree with given root url
    #

    def get_reftree(self, rooturl):

        def find_root(url):
            for r in self.rootlist:
                if r.absurl == url[0] and r.connid == url[1]:
                    return r

            for r in self.translist:
               if r.absurl == url[0] and r.connid == url[1] \
                       and r.reflink and (r.reflink.type & DUM) \
                       and not r.isimplied:
                    t = r.reflink.trans
                    r.reflink = None
                    self.dummytrans.remove(t)
                    self.rootlist.remove(t)
                    return r
                
            return None

        def descend_tree(t):
            tlist.append(t)
            try:
                conndict[t.TConn] = 1
            except TypeError:
                print 't.TConn is', t.TConn
                raise
            for h in t.downlinks:
                if (h.reflink.type & LR_ROOT):
                    continue
                descend_tree(h)

        tlist = []
        conndict = {}
        r = find_root(rooturl)
        if not r:
            print 'Get_tree: can\'t find root \'%s\' conn %d' % \
                  (rooturl[0], rooturl[1])
            print 'roots available:'
            for r in self.rootlist:
                print r.ob.absurl, r.connid
            raise TreeError, ''

        descend_tree(r)

        return (tlist, conndict.keys())
            

                    
#############################################################################
            
    def process_trees(self):
        
        from  np_treestats import TreeStats

        stats = TreeStats('', trace=1, quiet=0, savedata = 0, logpath=None)

        sdict = stats.sdict = {}
        for c in self.connlist:
            sd = sdict.setdefault(c.server, [])
            sd.append(c)

        stats.run_start = self.start
        stats.lookup = stats.lookup2 = intoa_string
        stats.basepath = statsbasedir = None
            
        stats.get_data(self)
        
#############################################################################

    

    #
    # Deliver sorted lists of user agents and server implementations
    #

    def get_agentlists(self):
        
        alist = [(i[1][1], i[0]) for i in self.uagentsdict.items()]
        slist = [(i[1], i[0]) for i in self.sagentsdict.items()]
        
	alist.sort()
	slist.sort()

        return alist, slist
                    
#############################################################################

    #
    # Deliver sorted lists of transaction errors
    #

    def get_transerrlist(self):
        
        l = [(i[1], i[0]) for i in self.errsdict.items()]
	l.sort()

        return l
                    
#############################################################################

    #
    # Deliver sorted lists of server return codes
    #

    def get_retcodelist(self):
        
        l = self.retcodedict.items()
	l.sort()

        return l
                        
                    
#############################################################################
#############################################################################

class WebClient(WebHosts):

    def __init__(self, addr, rooturl=None):

        WebHosts.__init__(self, addr, rooturl)
        self.Class = 'WebClient'

#############################################################################

    def addr_str(self):

        return intoa_string(self.addr)

#############################################################################

    def host_str(self):

        return intoa_string(self.addr)
                        
                    
#############################################################################
#############################################################################

class WebServer(WebHosts):

    def __init__(self, addr, rooturl=None):

        WebHosts.__init__(self, addr, rooturl)
        self.Class = 'WebServer'

#############################################################################

    def addr_str(self):

        return intoa_string(self.addr)

#############################################################################

    def host_str(self):

        return intoa_string(self.addr)
                        
                    
#############################################################################
#############################################################################

class TreeError:

    #
    # Generic exception for errors in reference tree manipulations
    #

    def __init__(self, val):

        self.value = val

    def __str__(self):

        return self.value

                        
                    
#############################################################################
#############################################################################

class DummyLink(LinkRec):

    def __init__(self, target, tm, trans, type=None):

        LinkRec.__init__(self, None)

        if not type:
            # Assume followed link if req is a container
            # XXX TODO - need better test - eg. could be a frame
            #if target.is_referrer():
            if target.iscontainer:   
                type = LR_REFERRER_LINK
            else:
                type = LR_REFERRER_INLINE
        self.type = type
        self.target = target
        #self.url = target.absurl
        self.tm = tm
        self.trans = trans
        self.subs = []


    # Override
    def get_url(self):
        
        return self.target.obname

	
	
#############################################################################
#############################################################################

#
# Represents a web 'object'

class Obnode:

    def __init__(self, absurl):
	self.absurl = absurl
	self.host = ''
	self.reqs = []   #list of Request objects
        self.ldict = {} # links found in this object
        self.isdummy = 0
        self.is_referrer = 0
        self.dummy = None # any pre-existing dummy hangs off here

    def printself(self):
	print '%s ' % (self.absurl),
	print 'seen %d, ' % (len(self.reqs))
	print 'Requests: '
	for req in self.reqs:
	    print '\tTCPConn #%d %s %s %s %s' % (req.connid, 
					 nprobe.http_client_method_string(req.reqtype), 
					 req.obname, 
					 nprobe.http_server_objtype_string(req.obtype), 
					 Longstring(req.reqtm))
	if len(self.host):
	    print 'Host: %s' % (self.host)
	print

#############################################################################
#############################################################################

#
# Dictionary of link types and how to draw them
#

class LinkDict:

    def __init__(self):

        # bitmaps for dashed line
        plain = ''
        dashed = med_dash
        dashed_inv = med_dash_inv
        short_dashed = short_dash
        short_dashed_inv = short_dash_inv

        self.dict = {
            # flags have seen link(s)
            'types-seen': 0,
            0: [0, 'Unknown link type', 'red', dashed, 0],
            # in-line link found in seen referrer
            LR_INLINE: [0, 'In-line link', 'blue', short_dashed],
            # multiple fetch of object
            LR_INLINE_MULTI: [0, 'Multiple fetch', 'purple3', short_dashed],
            # followed link found in seen referrer
            LR_LINK: [0, 'Followed link', 'blue', dashed_inv],
            # in-line link from unseen referrer
            LR_REFERRER_INLINE: [0, 'Claimed inline link', 'goldenrod1',
                                 short_dashed_inv],
            # followed link from unseen referrer
            LR_REFERRER_LINK: [0, 'Claimed followed link', 'goldenrod1',
                               dashed],
            #LR_DUMMY: [0, 'DUMMY link', 'orange', short_dashed],

            # automatic redirection
            LR_REDIRECT_INLINE: [0, 'In-line redirect', 'MediumOrchid1',
                                 short_dashed],
            # followed redirection
            LR_REDIRECT_LINK: [0, 'Followed redirect', 'MediumOrchid1',
                               dashed],
            # ?
            LR_FOLLOW_ON: [0, 'Follow-on', 'pink', short_dashed],
            #LR_REFRESH:[0, 'Refresh', 'pink', dashed],
            LR_REFRESH:[0, 'Refresh', 'DarkOrchid1', dashed],
            LR_REVISIT: [0, 'Revisit', 'green', dashed]
            }

    def link_str(self, ltype):

        try:
            s = self.dict[ltype][1]
        except KeyError:
            s = 'Unknown link type'

        return s

linkdict = LinkDict()


###########################################################################
###########################################################################
