###############################################################################
#                                                                             #
#   Copyright 2005 University of Cambridge Computer Laboratory.               #
#                                                                             #
#   This file is part of Nprobe.                                              #
#                                                                             #
#   Nprobe is free software; you can redistribute it and/or modify            #
#   it under the terms of the GNU General Public License as published by      #
#   the Free Software Foundation; either version 2 of the License, or         #
#   (at your option) any later version.                                       #
#                                                                             #
#   Nprobe is distributed in the hope that it will be useful,                 #
#   but WITHOUT ANY WARRANTY; without even the implied warranty of            #
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the             #
#   GNU General Public License for more details.                              #
#                                                                             #
#   You should have received a copy of the GNU General Public License         #
#   along with Nprobe; if not, write to the Free Software                     #
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA #
#                                                                             #
###############################################################################


from urlparse import *
from  np_longutil import ul2l, ull2l
from nprobe import DUMMY_VALUE, LR_TS, LR_SCRIPTED, LR_ARCHIVE, LR_INLINE, \
     LR_LINK, LR_UNKNOWN, LR_REL_BASE, LR_END_BASE, LR_REDIRECT, \
     LR_SCRIPT_ARCHIVE, LR_SCRIPTED_LINK, LR_SCRIPTED_INLINE, \
     LR_SCRIPTED_UNKNOWN, LR_REL_SCRIPT_BASE, LR_END_SCRIPT_BASE, \
     LR_REDIRECT_INLINE, LR_REDIRECT_LINK, LR_REFRESH_URL, LR_REFRESH_SELF, \
     LR_REFRESH, LR_HAS_URL
from nprobe import CT_NON_OBJECT, CT_DUMMY
from nprobe import TRANS_VAL, TRANS_DUMMY_UNSYNCH, TRANS_DUMMY_ERR, \
     TRANS_ERR, TRANS_INCOMPLETE, TRANS_FINISHED
from nprobe import linkrec, _free

from print_col import whoops, inform


                        
                    
#############################################################################
#############################################################################

class Transaction:

    def __init__(self, trans, TConn, client, lookup, logfun=None, trace=0):

        self.trace = 1 and trace
        if logfun == None:
            logfun = self.f_null
        self.logfun = logfun

        self.client = client
        self.servaddr = TConn.server
	self.connid = TConn.id
        self.connorder = trans.connorder
        self.abtmbase = TConn.abstart

        self.TConn = TConn

        self.reflink = None
        self.iscontainer = 0
        self.intree = 0

	isdummy = 0 # unsynch or error dummy trans from nprobe
        self.isimplied = 0 # impled transaction for unseen referring object
	self.cstatus = cstatus = trans.http_cli_status()
	self.sstatus = sstatus = trans.http_serv_status()
	self.cvalid = cvalid = cstatus & TRANS_VAL
	self.svalid = svalid = sstatus & TRANS_VAL
	cunsync = cstatus & TRANS_DUMMY_UNSYNCH
	sunsync = sstatus & TRANS_DUMMY_UNSYNCH
	cerrdum = cstatus & TRANS_DUMMY_ERR
	serrdum = sstatus & TRANS_DUMMY_ERR
	self.cisdummy = cdum = cunsync | cerrdum
	self.sisdummy = sdum = sunsync | serrdum
	self.cerr = cerr = trans.http_cli_iserr()
	self.serr = serr = trans. http_serv_iserr()
        
	if not (cvalid or svalid):
	    self.valid = 0
            client.trans_invalid += 1
            str1 = 'TCPConn #%d Transaction invalid:' % (TConn.id)
            str2 = 'WebClient #%s Transaction invalid: %d' % \
                   (client.host_str(), TConn.id)
            if trace:
                inform(str1)
            logfun(str1)
            logfun(str2)
	    return
        
	if self.cisdummy or self.sisdummy:
	    #self.valid = 0
            str1 = 'TCPConn #%d Dummy Transaction:' % (TConn.id)
            str2 = 'WebClient #%s Dummy Transaction: %d' % \
                   (client.host_str(), TConn.id)
            if trace:
                inform(str1)
            logfun(str1)
            logfun(str2)

        self.set_url(trans, TConn, lookup)
        self.uac = -1
	
	self.reqtype = trans.http_meth()

        # by now cvalid or svalid
	if cvalid:
            if not cdum:
                TConn.ncgood += 1
	    self.relreqstart = ul2l(trans.http_reqstart_us())
	    self.relreqend = ul2l(trans.http_reqend_us())
	    if self.relreqstart < 0 or self.relreqend < 0:
                str = 'TCPConn #%d Req client negative offset:' % (TConn.id)
                if trace:
                    whoops(str)
                    TConn.printself_with_trans()
                    raw_input('anything to continue...\n')
                logfun(str)
	    self.uagent = trans.get_uagent()
            if not self.uagent:
                self.uagent = 'Unknown'
	    self.cvia = trans.get_cvia()
	    ctype = self.cobtype = trans.http_c_obtype()
	else:
	    self.relreqstart = self.relreqend = ul2l(trans.http_repstart_us())
            self.uagent = 'Unknown'

        TConn.translist.append(self)

	if svalid:
            if not sdum:
                TConn.nsgood += 1 #####
	    self.relrepstart = ul2l(trans.http_repstart_us())
	    self.relrepend = ul2l(trans.http_repend_us())
	    if self.relrepstart < 0 or self.relrepend < 0:
                str = 'TCPConn #%d Rep server negative offset:' % (TConn.id)
                if trace:
                    whoops(str)
                    TConn.printself_with_trans()
                    raw_input('anything to continue...\n')
                logfun(str)
	    self.retcode = trans.http_server_retcode()
            #if self.retcode/100 == 3 and self.retcode != 304:
                #logfun('WebClient #%s Redirection:' % (intoa_string(client.addr)))
	    self.nbytes = trans.http_obj_bytes()
            self.hdrlen = trans.http_r_hdrlen()
	    self.npkts = trans.http_obj_pkts()
	    self.server = trans.get_server()
	    self.svia = trans.get_svia()
	    self.sobtype = trans.http_s_obtype()
            self.finger = trans.sfinger()
 	else:
 	    # not svalid
 	    self.relrepstart = self.relrepend = self.relreqend
            self.retcode = 0
            self.sobtype = CT_NON_OBJECT
            self.nbytes = self.hdrlen = 0
            self.server = None
            self.finger = (0, 0, 0)

        # get a pointer to the links buffer
        self.linkstr = trans.get_links_buf()
        self.linkstrlen = trans.get_links_buflen()
        if self.linkstrlen:
            self.iscontainer = 1
            # keep it when the trans object is freed
            trans.hold_links = 1

	if self.relrepend > (TConn.abclose - TConn.abstart):
            str = 'TCPConn #%d Transaction finishes after connection close:' % (TConn.id)
            if trace:
                whoops(str)
                TConn.printself_with_trans()
                raw_input('anything to continue...\n')
            logfun(str)

	# TMP for debugging
	#self.trans = trans

	timing_goof = 0
        strs = []
	if cvalid and not (self.relreqstart <= self.relreqend):
	   timing_goof = 1
           strs.append('TCPConn #%d TIMING GOOF -  cvalid and not (self.relreqstart <= self.relreqend):' % (TConn.id))
	if svalid and not (self.relrepstart <= self.relrepend):
	   timing_goof = 1
           strs.append('TCPConn #%d TIMING GOOF - svalid and not (self.relrepstart <= self.relrepend):' % (TConn.id)) 
	if cvalid and svalid:
	    if self.relreqend > self.relrepstart \
	       and not (serr or serrdum or sunsync or cunsync or cerrdum):
		timing_goof = 1
		strs.append('TCPConn #%d TIMING GOOF - self.relreqend > self.relrepstart and not (serr or serrdum or sunsync):' % (TConn.id))
                
	if timing_goof:
            for s in strs:
                logfun(s)
                if trace:
                    whoops(s)
            if trace:
                TConn.printself_with_trans()
	    self.valid = 0
	    #raw_input('anything to continue...\n')
	else:
	    self.valid = 1

        # Was trans successful?
        if (sstatus & TRANS_INCOMPLETE) \
                   and not (sstatus & TRANS_FINISHED):
            self.isfail = 1
        else:
            self.isfail = 0

##     def __del__(self):
##         print 'Freeing Transaction'
##         if self.linkstrlen:
##             print 'Freeing links'
##             _free(self.linkstr)
        
#############################################################################

    def set_url(self, trans, TConn, lookup):

	def getabsurl():

	    obname = self.obname = trans.getreq().replace(' ', '\32')
	    host = trans.gethost()

	    #print obname
	    if not obname:
                obname = 'NO_URL'
                str = 'TCPConn #%d OBNAME GOOF: trans %d' % \
                      (self.connid, self.connorder) 
		whoops(str)
                self.logfun(str)
		TConn.printself_with_trans()

	    p = list(urlparse(obname, 'http'))

            if not host:
                if not p[1]:
                    # just a relative url
                    # must assume going to origin server
                    host = lookup(TConn.server)
                    ps = ':%d' % (TConn.serverport)
                    p[1] = host + ps
                else:
                    h = p[1].split(':')
                    if len(h) != 2:
                        p = list(p)
                        ps = ':80'
                        p[1] += ps
                    else:
                        ps = h[1]
            else:
               h = host.split(':')
               if len(h) != 2:
                   ps = ':80'
                   host += ps
               else:
                   ps = h[1]
               if not p[1]:
                   p[1] = host
               else:
                   h = p[1].split(':')
                   if len(h) != 2:
                       p[1] += ps
                   else:
                       ps = h[1]
                   if host != p[1]:
                       str = 'TCPConn #%d URL/Host contradiction trans %d'\
                             % (self.connid, self.connorder)
                       if self.trace:
                           inform(str)
                       self.logfun(str)
                       
            if not p[2]:
                p[2] = '/'
                
            absurl = urlunparse(p)
            self.portstr = ps    
	    return (obname, host, absurl)


        #
        # Main Fn
        #

	if (not self.cvalid) \
	   or (self.cstatus & (TRANS_DUMMY_UNSYNCH | TRANS_DUMMY_ERR) \
	       or (self.cstatus & TRANS_ERR)):
	    # unsynch or error dummy trans
	    #isdummy = 1
	    self.obname = ''
	    self.absurl = ''
	    self.host = ''
	    self.referrer = ''
            self.portstr = ''
	##     print 'ODD TYPE'
## 	    conn.tconn.printself()
## 	    trans.printself(conn.tconn)
            str = 'TCPConn #%d Client transaction invalid/error:' % (self.connid)
            if self.trace:
                inform(str)
            self.logfun(str)

	else:
	    self.obname, self.host, self.absurl = getabsurl()
	    referrer = trans.getref()
            if referrer:
                referrer = referrer.replace(' ', '\32')
                u = list(urlparse(referrer, 'http'))
                h = u[1].split(':')
                if len(h) != 2:
                    u[1] += ':80'
                if not u[2]:
                    u[2] = '/'
                self.referrer = urlunparse(u)
            else:
                self.referrer = ''
        
#############################################################################


#
# Build dictionaries of inlined, lined and ? URLs
# - must be called after time offsets adjusted
# returns 1 if redirect link contained
#
            

    def get_links(self):

        #self.is_referrer = 0
        gotr = 0
        if self.svalid:
	    # Stack of possibly nested base scopes
	    #print "Base initialised"
	    base = self.absurl
            ps = self.portstr
            bstack = [(base, ps)]
            bslen = 1
            links = self.linkstr
            charslen = self.linkstrlen
	    indx = 0
            #link_used = 1
            link = linkrec()
	    while indx < charslen: # step through links buffer
                #if link_used:
                    #rec = linkrec()
                    #rec = LinkRec()
		indx = link.next_lrec(indx, links) # populates rec
		type = link.type
                    
		if type == LR_TS:
                    #del(rec)
		    continue
		elif (type &  LR_REL_BASE):
		    #print "Base PUSH"
                    base = link.url
                    u = list(urlparse(base, 'http'))
                    h = u[1].split(':')
                    if len(h) != 2:
                        # no port no.
                        ps = ':80'
                        u[1] += ps
                    else:
                        ps = h[1]
                    base = urlunparse(u)
		    bstack.append((base, ps))
                    bslen += 1
                    #del(rec)
		    continue
		elif (type &  LR_END_BASE):
		    #print "Base POP"
                    if bslen > 1:
                        bstack.pop()
                        bslen -= 1
                    else:
			str = 'TCPConn #%d Base stack empty:' % (self.connid)
                        if self.trace:
                            whoops(str)
                            #conn.tconn.printself()
                            #trans.printself(conn.tconn)
                        self.logfun(str)
		    base, ps = bstack[-1]
                    #del(rec)
		    continue
		#print '%s %d %x' % (rec.url, rec.tm, rec.type)
		#raw_input('anything to continue...\n')

                # must be a link/inline/unknown URL
                self.ob.is_referrer += 1

                url = urljoin(base, link.url)
                if not url:
                    continue
                p = urlparse(url)
                if len(p[1].split(':')) != 2:
                    # no port no.
                    p = list(p)
                    p[1] += ps
                    url = urlunparse(p)
                if not p[2]:
                    p = list(p)
                    p[2] = '/'
                    url = urlunparse(p)
                    
                rec = LinkRec(link)
                rec.trans = self
                rec.tm = rec.reltm + self.repstart
                rec.url = url

                if type & LR_REFRESH:
                    if type == LR_REFRESH_SELF:
                        url = self.absurl
                    type = LR_REFRESH
                    rec.type = LR_REFRESH
                    self.logfun('WebClient #%s Refresh: %s' % \
                   (self.client.host_str(), url))

                if gotr and url == rurl:
                    type = rec.type = LR_REDIRECT_LINK
                    
                if (type & LR_REDIRECT):
                    gotr = 1
                    rurl = url
                    
		#print '%s\n%s\n%s\n\n' % (base[-1], rec.url, url)
		#raw_input('anything to continue...\n')

                #
                # This is a PATCH - *.css shows wrong link type
                #
                is_css = 0
                if url[-4:] == '.css':
                    type &= ~LR_LINK
                    type |= LR_INLINE
                    rec.type = type
                    #is_css = 1


                subd = 0
                for dict in (self.client.linkdict, self.ob.ldict):
                    try:
                        f = dict[url]
                        f[0] |= type
                        if f[1][-1].trans == self \
                           and f[1][-1].type == type:
                            if not subd:
                                # add subsidiary record
                                # fields are link-seen time, link type and target
                                f[1][-1].subs.append([rec.tm, type, None])
                                subd = 1
                                if is_css:
                                    print 'sub added', rec
                                #raw_input('...')
                        else:
                            f[1].append(rec)
                            rec.subs = []
                            rec.target = None
                            rec_used = 1
                            if is_css:
                                print 'another added', rec
                                #raw_input('...')
                            #if type & LR_REDIRECT:
                                #print 'TCPConn #%d adding indirect %d to %s' % (self.order, len(f[1]), url)
                    except KeyError:
                        dict[url] = [type, [rec]]
                        if is_css:
                            print 'new added', rec
                            #raw_input('...')
                        rec.target = None
                        rec.subs = []
                        rec_used = 1
                        #if type & LR_REDIRECT:
                            #print 'TCPConn #%d adding first indirect to %s' % (self.order, url)

##                 if is_css:
##                     print self.ob.ldict
##                     raw_input('...')

	    
	else:
	    # not svalid
	    self.relrepstart = self.relrepend = self.relreqend
	    self.links = {}
            
            

        #print 'returning %x' % (gotr)
        
        return gotr
        
#############################################################################

    def is_container(self):
        """
        is_container(self) - does the object delivered contain links, etc
        
        """
        
        return self.ob.is_referrer and self.retcode == 200 and not self.isfail
        
#############################################################################

    def is_referrer(self):
        """
        is_referrer(self) - is this an object which might contain links
        """
        # XXX TODO - need better test - eg. could be a frame
        return self.ob.is_referrer \
           or (self.retcode != 200 and self.sobtype == 1)


        
#############################################################################


    def printself(self):
	self.trans.printself(self.conn.tconn)
        
#############################################################################
	

    def printself_long(self):
	self.trans.printself(self.conn.tconn)
	print '***************************************************************'
	self.conn.tconn.printself()
	for t in self.conn.translist:
	    t.printself(self.conn.tconn)
        
#############################################################################

    def print_links(self):
        pass

## 	def sort_link_by_time(a, b):
## 	    return int(a[1][0] - b[1][0])

## 	print 'Object %s' % (self.absurl)

## 	linklist = self.inlines.items()
## 	linklist.sort(sort_link_by_time)
## 	print 'In-line:-'
## 	for link in linklist:
## 	    print '%s (x%d) %s' % (link[0], link[1][1], 
## 				   tsLongstring(link[1][0]))

## 	linklist = self.links.items()
## 	linklist.sort(sort_link_by_time)
## 	print 'Links:-'
## 	for link in linklist:
## 	    print '%s (x%d) %s' % (link[0], link[1][1], 
## 				   tsLongstring(link[1][0]))

## 	linklist = self.unknowns.items()
## 	linklist.sort(sort_link_by_time)
## 	print 'Unknowns:-'
## 	for link in linklist:
## 	    print '%s (x%d) %s' % (link[0], link[1][1], 
## 				   tsLongstring(link[1][0]))

	
	
#############################################################################

    def f_null(self, arg):

        pass

#############################################################################
#############################################################################

class DummyTransaction(Transaction):
    

    def __init__(self, obname, ob, time, connid, uac):
	self.obname = obname
        self.ob = ob
        self.uac = uac
        ob.is_referrer = 1
        self.iscontainer = 1

        # XXX TODO adjust for intervening RTT's
	self.reqstart = self.reqend = self.repstart = self.repend = time
        
	self.connid = -connid
        self.TConn = None
	self.cvalid = 1
	self.svalid = 1
	self.followed = []
	self.inlines = {}
	self.links = {}
	self.unknowns = {}
        self.isimplied = 1
        self.servaddr = None
        self.absurl = ob.absurl
        self.sobtype = CT_DUMMY

        self.nbytes = 0
        self.hdrlen = 0

        self.reflink = None
        self.downlinks = []
	return

	
	
#############################################################################
#############################################################################

## class LinkRec(linkrec):

##     def __init__(self):
        
##         linkrec.__init__(self)
##         self.trans = None
##         self.target = None
##         self.subs = None

##     def get_url(self):

##         return self.url

    
##     def __repr__x(self):

##         from np_WebHost import LinkDict
##         link_str = LinkDict().link_str

##         str = 'LinkRec type %x %s\n' % (self.type,
##                                         link_str(self.type))
##         str += 'URL: %s\n' % (self.get_url())
##         if hasattr(self, 'trans'):
##             str+= ' Referrer %d' % (self.trans.order)
##         #if hasattr(self.target, 'order'):
##             #str += ' Target %d' % (self.target.order)
##         if self.target:
##             str += ' Target %d' % (self.target.order)
##         else:
##             str += ' No target'
##         if self.type & LR_REFRESH:
##             str += ' Period: %d\n' % (self.per)
##         if hasattr(self, 'tm'):
##             str += ' Abs tm %ld.%.6ldus\n' % (self.tm/1000000, self.tm%1000000)
##         str += ' Rel tm %ld.%.6ldus\n' % (self.reltm/1000000,
##                                              self.reltm%1000000)
##         if self.subs:
##             print 'Subs', self.subs

##         return str

	
	
#############################################################################
#############################################################################

class LinkRec:

    def __init__(self, lr):
        
        self.trans = None
        self.target = None
        self.subs = None

        if lr:
            self.reltm = lr.reltm
            self.type = lr.type
            self.per = lr.per

    def get_url(self):

        return self.url

    
    def __repr__x(self):

        from np_WebHost import LinkDict
        link_str = LinkDict().link_str

        str = 'LinkRec type %x %s\n' % (self.type,
                                        link_str(self.type))
        str += 'URL: %s\n' % (self.get_url())
        if hasattr(self, 'trans'):
            str+= ' Referrer %d' % (self.trans.order)
        #if hasattr(self.target, 'order'):
            #str += ' Target %d' % (self.target.order)
        if self.target:
            str += ' Target %d' % (self.target.order)
        else:
            str += ' No target'
        if self.type & LR_REFRESH:
            str += ' Period: %d\n' % (self.per)
        if hasattr(self, 'tm'):
            str += ' Abs tm %ld.%.6ldus\n' % (self.tm/1000000, self.tm%1000000)
        str += ' Rel tm %ld.%.6ldus\n' % (self.reltm/1000000,
                                             self.reltm%1000000)
        if self.subs:
            print 'Subs', self.subs

        return str
	
	
#############################################################################
#############################################################################
