#! /usr/bin/env python
###############################################################################
#                                                                             #
#   Copyright 2005 University of Cambridge Computer Laboratory.               #
#                                                                             #
#   This file is part of Nprobe.                                              #
#                                                                             #
#   Nprobe is free software; you can redistribute it and/or modify            #
#   it under the terms of the GNU General Public License as published by      #
#   the Free Software Foundation; either version 2 of the License, or         #
#   (at your option) any later version.                                       #
#                                                                             #
#   Nprobe is distributed in the hope that it will be useful,                 #
#   but WITHOUT ANY WARRANTY; without even the implied warranty of            #
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the             #
#   GNU General Public License for more details.                              #
#                                                                             #
#   You should have received a copy of the GNU General Public License         #
#   along with Nprobe; if not, write to the Free Software                     #
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA #
#                                                                             #
###############################################################################


from sys import argv
import getopt
import os
import sys
import re
import commands
from math import sqrt
import Numeric
from urlparse import urlparse
from socket import inet_aton

import np_warnings

from np_treestats import NOB_THRESH

from np_TCPConn import D_BIGDEL

from nprobe import CT_TEXT_HTML, CT_TEXT_XML, http_server_objtype_string

from histo import Histogram, HistogramError

N_SERV_BKTS = 1000

replist = []

def rep(s):
    replist.append(s)

def rep_now(s):

    print s
    rep(s)

def rep_per(args):
    boss_v, boss_t = args[0]
    if not boss_v:
        return

    rep('%d %s:' % (boss_v, boss_t))
    for v, t in args[1:]:
        if v:
            pcf = 100.0/boss_v
            rep('\t%d %s (%.2f%%)' % (v, t, v*pcf))

def rep_rep():
    try:
        sf = open(sumfilenm, 'w')
    except IOError, s:
        print 'Couldn\'t open summary file', s
        sys.exit(1)
        
    for s in replist:
        print s
        sf.write(s + '\n')
    
               

acc_fields = ['totpages', 'nobs', 'nconns', 'npdel', 'npbdel', 'npbdel85',
              'no_del_del', 'sp', 'lp', 'unlinked', 'refr', 'inv', 'rvst']

class accum:

    def __init__(self, what):
        self.what = what
        for f in acc_fields:
            setattr(self, f, 0)
        self.ud = {}
        self.sd = {}
        self.cd = {}
        self.nservd = Numeric.zeros(N_SERV_BKTS,)

    def report(self):
        rep(self.what)
        rep('%d servers %d clients %d obs %d conns' \
              % (self.nserv, self.ncli, self.nobs, self.nconns))
        rep_per([(self.nurl, 'URLs')])
        page_per = [
            (self.totpages, 'Page downloads'),
            (self.sp, 'lt %d obs' % (nob_thresh)),
            (self.lp, 'gt %d obs' % (nob_thresh))
            ]
        single = self.unlinked+self.refr+self.inv+self.rvst
        if single:
            page_per += [
                (single, 'single objects not in trees'),
                (self.unlinked, 'unlinked'),
                (self.refr, 'refreshes'),
                (self.rvst, 'revisits'),
                (self.inv, 'invalid')
                ]
        if self.npdel:
            page_per += [
                (self.npdel, 'delayed'),
                (self.npbdel, 'long delayed'),
                (self.npbdel85, 'long delayed at 85%')
                ]
        if self.no_del_del:
            page_per += [(self.no_del_del, 'delays not adding to total')]
        rep_per(page_per)
        rep_per([(self.nserv, 'Servers'),
                 (self.nsdel, 'delayed'),
                 (self.nsbdel, 'long delayed')
                 ])
        rep_per([(self.nurl, 'URLs'),
                 (self.nudel, 'delayed'),
                 (self.nubdel, 'long delayed')
                 ])
        rep('Distribution of servers/page')
        for i in range(N_SERV_BKTS):
            ns = self.nservd[i]
            if ns:
                rep('  %d %12d' % (i, ns))
        rep('\n')


def accum_tot(a, b, c):
    c.ud = {}
    c.ud.update(a.ud)
    for v in b.ud.items():
        s = v[1]
        e = c.ud.setdefault(v[0], [0,0,0,0])
        e[0] += s[0]
        e[1] += s[1]
        e[2] += s[2]
        e[3] += s[3]
    c.sd = {}
    c.sd.update(a.sd)
    for v in b.sd.items():
        s = v[1]
        e = c.sd.setdefault(v[0], [0,0,0,0])
        e[0] += s[0]
        e[1] += s[1]
        e[2] += s[2]
        e[3] += s[3]
    c.cd = {}
    c.cd.update(a.cd)
    for v in b.cd.items():
        s = v[1]
        e = c.cd.setdefault(v[0], [0,0,0,0])
        e[0] += s[0]
        e[1] += s[1]
        e[2] += s[2]
        e[3] += s[3]

    for i in range(N_SERV_BKTS):
        c.nservd[i] = a.nservd[i] + b.nservd[i] 
        
    for f in acc_fields:
        setattr(c, f, getattr(a, f) + getattr(b, f))

def do_pagestuff(sd):

    def dl_accum(d, s, e):

        a = [0]*(e-s+1)
        for v in d.values():
            n = 0
            for i in range(s, e+1):
                a[n] += v[i]
                n += 1
        return a

    def by_0(a, b):
        return int(b[0]-a[0])

    def by_1(a, b):
        return int(b[1]-a[1])

    def by_2(a, b):
        return int(b[2]-a[2])

    def by_3_0(a, b):
        return int(b[3][0]-a[3][0])

    def by_3_1(a, b):
        return int(b[3][1]-a[3][1])

    def by_3_2(a, b):
        return int(b[3][2]-a[3][2])

    def by_4_1(a, b):
        return int(b[4][1]-a[4][1])

    def by_4_2(a, b):
        return int(b[4][2]-a[4][2])

    def by_5_1(a, b):
        return int(b[5][1]-a[5][1])

    def by_5_2(a, b):
        return int(b[5][2]-a[5][2])

    def by_7(a, b):
        return long(b[7]-a[7])

    def add_3(a, b):
        a[0] += b[0]
        a[1] += b[1]
        a[2] += b[2]

    def object_type_string(otype):
    
        return http_server_objtype_string(otype).replace('/', '-')

        

    slist = []
    tot_bytes = 0
    tot_disc_p = tot_disc_o = tot_disc_oo = 0

    allptots = [0, 0, 0]
    allotots = [0, 0, 0]
    allootots = [0, 0, 0]

    plinksd = {}
    intlinksd = {}
    otypesd = {}

    uhist = Histogram(lower=0, bucketsz=1)

    page_n = 0
    
    for s, (hdd, pd, od, odd) in sd.items():
        ptots = dl_accum(pd, 1, 4)
        add_3(allptots, ptots)
        otots = dl_accum(od, 0, 3)
        add_3(allotots, otots)
        ootots = dl_accum(odd, 0, 3)
        add_3(allootots, ootots)
        totb = ptots[-1] + otots[-1] + ootots[-1]
        tot_bytes += totb
        disc_p = len(pd)
        tot_disc_p += disc_p
        disc_o = len(od)
        tot_disc_o += disc_o
        disc_oo = len(odd)
        tot_disc_oo += disc_oo
        slist.append((disc_p, disc_o, disc_oo, ptots, otots, ootots, s, totb))

        for p in pd.values():
            page_n += 1
            uhist.add(p[8])
            ld = p[0] # links to page
            for ltype, n in ld.items():
                plinksd[ltype] = plinksd.setdefault(ltype, 0) + n

            ld = p[5] # links within page
            for ltype, (max_ndisc, nd_disc, followed, dups) in ld.items():
                ent = intlinksd.setdefault(ltype, (Histogram(lower=0, bucketsz=1), Histogram(lower=0, bucketsz=1), Histogram(lower=0, bucketsz=1), Histogram(lower=0, bucketsz=1)))
                for h, v, div in [
                    (ent[0], max_ndisc, 0),
                    (ent[1], nd_disc, 1),
                    (ent[2], followed, 1),
                    (ent[3], dups, 1)
                    ]:
                    if v:
                        if div:
                            v = v/p[3]
                        h.add(v)
            otypes = p[7]
            for ot, n in otypes.items():
                h = otypesd.setdefault(ot, Histogram(lower=0, bucketsz=1))
                h.add(n)
            
                

    slist.sort()
    slist.reverse()

    for lab, srt, f1, f2, tot in [
       ('discrete page references', by_0, 0, None, tot_disc_p),
       ('discrete object references', by_1, 1, None, tot_disc_o),
       ('discrete others object references', by_2, 2, None, tot_disc_oo),
       ('pages encountered', by_3_0, 3, 0, allptots[0]),
       ('pages requested', by_3_1, 3, 1, allptots[1]),
       ('pages downloaded', by_3_2, 3, 2, allptots[2]),
       ('objects requested', by_4_1, 4, 1, allotots[1]),
       ('objects downloaded', by_4_2, 4, 2, allotots[2]),
       ('others objects requested', by_5_1, 5, 1, allootots[1]),
       ('others objects downloaded', by_5_2, 5, 2, allootots[2]),
#       ('bytes downloaded', by_7, 7, None, tot_bytes)
       ]:
        rep('top servers by %s:\n' % (lab))
        slist.sort(srt)
        rest_val = 0
        rest_pc = 0.0
        for stuff in slist:
            if f2 == None:
                val = stuff[f1]
            else:
                val = stuff[f1][f2]
            pc = (val*100.0)/tot
            if pc >= 2.5:
                rep('\t%s %d (%.2f%%)' % (stuff[6], val, pc))
            else:
                rest_val += val
                rest_pc += pc
        rep('\tOther %d (%.2f%%)' % (rest_val, rest_pc))
        rep('\n')

    # links to pages
    rep('Link types to pages encountered:\n')
    pagelinks = [(n, ltype) for ltype, n in plinksd.items()]
    pagelinks.sort()
    pagelinks.reverse()
    rest_val = 0
    rest_pc = 0.0
    for n, ltype in pagelinks:
        pc = (n*100.0)/allptots[0]
        if pc >= 2.5:
            rep('0x%x %d (%.2f%%)' % (ltype, n, pc))
        else:
            rest_val += n
            rest_pc += pc
    rep('Other %d (%.2f%%)' % (rest_val, rest_pc))
    rep('\n')

    
    linksdir = os.path.join(basedir, 'links_data')
    
    try:
        os.makedirs(linksdir)
    except OSError,s:
        if str(s).find('File exists') < 0:
            raise

    intlinks = [(lt, lhists) for lt, lhists in intlinksd.items()]
    intlinks.sort()
    comm = 'First column is number of links, second is No. pages occurring'

    for lt, hists in intlinks:
        lts = '0x%x' % (lt)
        for h, fn, tit in [
            (hists[0], 'oa_max', 'max over all sightings of a page'),
            (hists[1], 'ave_seen', 'average No. over page downloads'),
            (hists[2], 'ave_followed', 'average No. followed'),
            (hists[3], 'ave_duplicated', 'average No. duplicated')
            ]:
            fnm = os.path.join(linksdir, '%s-%s' % (lts, fn))
            #f = open(fnm, 'w')
            try:
                h.results(zeros=0, file=fnm, title=tit, comment=comm)
            except HistogramError, s:
                if str(s).find('No samples presented') >= 0:
                    continue
                else:
                    raise

    fnm = os.path.join(linksdir, 'disc_urls')
    try:
        uhist.results(zeros=0, file=fnm, title='disc_urls', comment='Max number of discrete URL links of all types over all sightings of a page\n - first column is number of URLs, second is No. pages occurring')
    except HistogramError, s:
        if str(s).find('No samples presented') < 0:
            raise

    
    typesdir = os.path.join(basedir, 'types_per_page')
    
    try:
        os.makedirs(typesdir)
    except OSError,s:
        if str(s).find('File exists') < 0:
            raise

    for type, h in otypesd.items():
        typestr = object_type_string(type)
        fnm = os.path.join(typesdir, typestr)
        try:
            h.results(zeros=0, file=fnm, comment='Distribution of downloaded object types per page\n - first column is number of obs. of the type, second column is No. pages occuring', title='Downloaded types distribution')
        except HistogramError, s:
            if str(s).find('No samples presented') < 0:
                raise 
        


scriptname = os.path.basename(argv[0])
ofnm = None
nob_thresh = NOB_THRESH

try:
    optlist, args = getopt.getopt(sys.argv[1:], 'o:n:')

except getopt.error, s:
    print '%s: %s' % (scriptname, s)
    usage(scriptname)
    sys.exit(1)

for opt in optlist:
    if opt[0] == '-o':
        ofnm = opt[1]
    if opt[0] == '-n':
        nob_thresh = opt[1]

start_re = re.compile('# Run start = ([0-9]*).*')
start = 0

accums = [accum('NOT IN TREES:'), accum('IN_TREES:'), accum('TOTAL:')]

basedir = os.path.dirname(args[0])
basedir = os.path.join(basedir, 'Page_results')

try:
    os.makedirs(basedir)
except OSError,s:
    if str(s).find('File exists') < 0:
        raise

frange = []
suffs = []
pref_re = re.compile('(.\.rep\.\d*)-(.\.rep\.\d*)(\..*)')
print 'files from', os.getcwd(), ':'
rep('files from %s:' % (os.getcwd()))
for fnm in args:
    fnm = os.path.basename(fnm)
    print '   ', os.path.basename(fnm)
    m = pref_re.match(fnm)
    if m:
        #print m.group(1), m.group(2), m.group(3)
        frange.append(m.group(1))
        frange.append(m.group(2))
        suffs.append(m.group(3))

if frange:
    for suff in suffs[1:]:
        if suff != suffs[0]:
            print 'Ouch mixed suffix:', suff
    frange.sort()
    ofnm = frange[0] + '-' + frange[-1] + suffs[0]
    #print frange

else:
    pref_re = re.compile('(.\.rep\.\d*)\.(.*)') 
    m = pref_re.match(os.path.basename(fnm))
    if m: 
        #print m.group(1), m.group(2)
        ofnm = m.group(0)

if not ofnm:
    pref_re = re.compile('.*\.Pages') 
    m = pref_re.match(os.path.basename(fnm))
    if m:
        ofnm = os.path.basename(fnm)

if not ofnm:
    ofnm = raw_input('output files base? - base is %s\n?' % (basedir))
    if not ofnm:
        ofnm = os.path.basename(fnm)

if ofnm[0] == '-':
    ofnm = ofnm[1:]
    
outfbase = os.path.join(basedir, ofnm)
#print 'writing to'
rep('Output written to:')

writes = [['', []], ['.lt%d' % (nob_thresh), []], ['.gt%d' % (nob_thresh), []]]
ofnms = []
for suff2, write in writes:
    for suff1 in ['.dur', '.dur85', '.ndur', '.ndur85', '.del', '.del85', '.acc_del', '.ddf']:
        outfnm = outfbase + suff1 + suff2
        ofnms.append(outfnm)
        try:
            f = open(outfnm, 'w')
        except IOError, s:
            print 'Couldn\'t open data file', s
            sys.exit(1)
        write.append(f.write)
        #print 'writing to', os.path.basename(outfnm)
        #print outfnm
        rep(outfnm)

sumfilenm = outfbase + '.Summary'
#print 'Summary file is', sumfilenm
rep('Summary file is %s' % (sumfilenm))

#sys.exit(0)
totp = 0
toto = 0
badl = 0
min_tm = 99999999999999999.9999
max_tm = 0
single_unlinked = 0

#
# The following for page and object downloads info gathering
#

pagefirst = inobs = 1 # flags
namecache = {}

#
# 

#
# entries for 'P' keyed entry are:
# {(URL) host: (0/hostdata, 1/page_dict, 2/object_dict, 3/others_object_dict}
#  hostdata = {IP_addrs}
#  page_dict = {(URL path) page: [0/{linktypes (to page)}, 1/times-seen,
#                                 2/-requested, 3/-downloaded,
#                                 4/accum bytes, 5/{linktypes (in page)
#                                 6/{page type}, 7/{downloaded object types},
#                                 8/max discrete URL links in page]
#  object_dict = {(URL path) object: [0/times-seen,
#                                 1/-requested, 2/-downloaded, 3/accum bytes,
#                                  4/ {page_type}]
# others_object_dict = same as object_dict but objects served for pages on different server
#


servs = {'T':{}, 'P':{}, 'U':{}, 'R':{}, 'I':{}, 'V':{}}

line_err = 0

#
#
#
for fnm in args:
    
    if line_err == 1:
        s = replist.pop()
        rep_now('%s - **File truncated**' % (s))
    line_err = 0
    
    print os.path.basename(fnm)
    f = open(fnm, 'r')
    lno = -1
    for l in f.readlines():
        lno += 1 
        l = l.replace('\n', '')
        if l[0] == '#':
            m = None
            m = start_re.match(l)
            if m:
                strt = long(m.group(1))/1000000.0
                #print 'start = ', start
                if start == 0:
                    start = strt
                strt -= start
                print 'start is', start, 'offset is', strt
            if inobs:
                inobs = 0 # # denotes end of objects
                pass
            continue
        sf = l.rstrip().split(' ')
        intree = 1
        #print sf
        #url, cli, serv, by, no, nc, tm, dur, ndur, dur85, ndur85, dflags = sf
        rt = sf[0]
        if rt == 'T':
            if len(sf) < 14:
                rep_now('Bad line file %s line %d \"%s"' % (fnm, lno, l))
                line_err += 1
                continue
            toto += 1
            o_obno = int(sf[1])
            sf = sf[2:]
            o_url = sf[0]
            o_serv = sf[1]
            o_connorder = int(sf[2])
            o_nbytes = int(sf[3])
            o_ltype = int(sf[4], 16)
            o_type = int(sf[5])
            o_retcode = int(sf[6])
            o_tm = float(sf[7]) + strt
            o_finger = (int(sf[8]), int(sf[9]), int(sf[10]))
            o_iscont = int(sf[11])
            if o_iscont:
                o_alinks = int(sf[12])
                nltypes = int(sf[13])
                #assert len(sf) == 14 + nltypes*5
                if len(sf) != 14 + nltypes*5:
                    rep_now('Bad line file %s line %d \"%s"' % (fnm, lno, l))
                    line_err += 1
                    continue
                ldata = []
                for i in range(nltypes):
                    data = []
                    off = 14 + (i*5)
                    for j in range(5):
                        data.append(int(sf[off+j]))
                    ldata.append(data)
            else:
                #assert len(sf) == 12
                if len(sf) != 12:
                    rep_now('Bad line file %s line %d \"%s"' % (fnm, lno, l))
                    line_err += 1
                    continue
        elif rt == 'P':
            #assert len(sf) == 20
            if len(sf) != 20:
                rep_now('Bad line file %s line %d \"%s"' % (fnm, lno, l))
                line_err += 1
                continue
            totp += 1
            pagenum = int(sf[1])
            sf = sf[2:]
            url = sf[0]
            cli = sf[1]
            serv = sf[2]
            by = int(sf[3])
            no = int(sf[4])
            nc = int(sf[5])
            ltype = int(sf[6], 16)
            ctype = int(sf[7])
            nservs = int(sf[8])
            tm = float(sf[9]) + strt
            dur = int(sf[10])
            ndur = int(sf[11])
            dur85 = int(sf[12])
            ndur85 = int(sf[13])
            dflags = int(sf[14])
            acc_del = int(sf[15])
            delv = long(sf[16])
            cnt_del = int(sf[17])
            pagefirst = inobs = 1
            cont_seen = 0
        elif rt in ['U', 'R', 'I', 'V']:
            #assert len(sf) == 10
            if len(sf) != 10:
                rep_now('Bad line file %s line %d \"%s"' % (fnm, lno, l))
                line_err += 1
                continue
            totp += 1
            intree = 0
            url = sf[1]
            cli = sf[2]
            serv = sf[3]
            by = int(sf[4])
            ctype = int(sf[5])
            no = 1
            nc = 1
            tm = float(sf[6])/1000 + strt
            dur = int(sf[7])
            dur85 = dur
            ndur = int(sf[8])
            ndur85 = ndur
            dflags = int(sf[9])
            acc_del = cnt_del = dur - ndur
            delv = acc_del*acc_del
            nservs = 1

        ##     if ctype == CT_TEXT_HTML or ctype == CT_TEXT_XML:
##                 #single object page
##                 single_unlinked += 1
##                 intree = 1
##                 ltype = 0
            
        else:
            print 'pagesum - bad line %s %d: \"%s\"' % (fnm, lno, l)
            badl += 1
            continue

        if rt == 'T':
            up = urlparse(o_url)
            o_host = up[1].split(':')[0]
            if not o_host:
                try:
                    o_host = namecache[o_serv]
                except KeyError:
                    o_host = o_serv
            else:
                namecache[o_serv] = o_host
            obnm = up[2]
            if not obnm:
                obnm = 'NK'
                
            got = o_retcode == 200 or o_retcode == 206

            if pagefirst and o_host == host and obnm == page: #it's the page root container
                    #print 'xx',
                    pd[2] += 1
                    pd[4] += o_nbytes
                    if got:
                        pd[3] += 1
                        ptd = pd[6] # page object type dict
                        ptd[o_type] = ptd.setdefault(o_type, 0) + 1
                        cont_seen = 1
            else: # its a constituent object
                if o_host == host: # served by page host
                    od = hd[2]
                else:
                    od = servs['P'].setdefault(host, ({}, {}, {}, {}))[3]
                odd = od.setdefault(obnm, [0, 0, 0, 0, {}])
                #print 'XXX', 
                odd[0] += 1
                odd[1] += 1
                odd[3] += o_nbytes
                if got:
                    odd[2] += 1
                    ptd = odd[4]
                    ptd[o_type] = ptd.setdefault(o_type, 0) + 1

            if got:
                ptd = pd[7]
                ptd[o_type] = ptd.setdefault(o_type, 0) + 1

            if o_iscont and cont_seen:
                # add in page links data (transitive in case of frames)
                pd[8] = max(o_alinks, pd[8])
                dd = pd[5]
                for lt in ldata:
                    ld = dd.setdefault(lt[0], [0, 0, 0, 0])
                    ld[0] = max(ld[0], lt[1])
                    for i in range(2,5):
                        ld[i-1] += lt[i]
                
                
            
            pagefirst = 0

            
        else:

            if ndur == 0:
                continue



            #print rt, tm
            min_tm = min(tm, min_tm)
            max_tm = max(tm, max_tm)

            acc = accums[intree]
            acc.totpages += 1
            ue = acc.ud.setdefault(url, [0,0,0,0])
            se = acc.sd.setdefault(serv, [0,0,0,0])
            ce = acc.cd.setdefault(cli, [0,0,0,0])

            ue[0] += 1
            se[0] +=1
            ce[0] += 1

            dl = dur - ndur
            dl85 = dur85 - ndur85
            if dl:
                acc.npbdel += 1
                ue[1] += 1
                se[1] += 1
            if dl85:
                acc.npbdel85 += 1
                ue[2] += 1
                se[2] += 1
            if dflags:
                acc.npdel += 1
                ue[3] += 1
                se[3] += 1

            acc.nobs += no
            acc.nconns += nc
            #print totdur, durlessdel, tot85dur, dur85lessdel

            if not intree:
                if no != 1:
                    print 'Single object goof %d objects %s %d: %s' \
                          % (no, fnm, lno, l)
                if rt == 'U':
                    acc.unlinked += 1
                elif rt == 'R':
                    acc.refr += 1
                elif rt == 'I':
                    acc.inv += 1
                elif rt == 'V':
                    acc.rvst += 1
                else:
                    print 'Invalid non-tree reason %s %d: %s' % (fnm, lno, l)
                    sys.exit(1)

            write = writes[0][1]
            write[0]('%.3f\t%d\n' % (tm, dur))
            write[1]('%.3f\t%d\n' % (tm, dur85))
            write[2]('%.3f\t%d\n' % (tm, ndur))
            write[3]('%.3f\t%d\n' % (tm, ndur85))
            if dl:
                write[4]('%.3f\t%d\n' % (tm, dl))
            if acc_del and dur:
                write[6]('%.3f\t%.2f\n' % (tm, ((acc_del/no)*100.0)/ndur))
                write[7]('%.3f\t%.2f\n' % (    tm, (sqrt(delv/no))/ndur)   )
            if dl85:
                write[5]('%.3f\t%d\n' % (tm, dur85-ndur85))

            if no > nob_thresh:
                write = writes[2][1]
                acc.lp += 1
            else:
                write = writes[1][1]
                acc.sp += 1
            write[0]('%.3f\t%d\n' % (tm, dur))
            write[1]('%.3f\t%d\n' % (tm, dur85))
            write[2]('%.3f\t%d\n' % (tm, ndur))
            write[3]('%.3f\t%d\n' % (tm, ndur85))
            if dl:
                write[4]('%.3f\t%d\n' % (tm, dur-ndur))
            if acc_del and dur:
                write[6]('%.3f\t%.2f\n' % (tm, ((acc_del/no)*100.0)/ndur))
                write[7]('%.3f\t%.2f\n' % (tm, (sqrt(delv/no))/ndur))
            if dl85:
                write[5]('%.3f\t%d\n' % (tm, dur85-ndur85))

            if acc_del and not (dur-ndur):
                acc.no_del_del += 1

            acc.nservd[nservs] += 1

            # page/object info gathering

            if rt == 'P':
                up = urlparse(url)
                host = up[1].split(':')[0]
                if not host:
                    try:
                        host = namecache[serv]
                    except KeyError:
                        host = serv
                else:
                    namecache[serv] = host
                page = up[2]
                if not page:
                    page = 'NK'
                
                hd = servs['P'].setdefault(host, ({}, {}, {}, {}))
                hhd = hd[0]
                # accumulate IP addrs for this (URL) host
                hhd[serv] = hhd.setdefault(serv, 0) + 1

                #accumulate pages from this (URL) host
                pd = hd[1].setdefault(page, [{}, 0, 0, 0, 0, {}, {}, {}, 0])
                #print '%x' % ltype
                ld = pd[0]
                # accumulate link types to page
                ld[ltype] = ld.setdefault(ltype, 0) + 1
                # accumulate times seen
                pd[1] += 1




if not totp:
    print 'No pages in page file(s)'
    sys.exit(1)

print totp, 'pages'
print toto, 'objects'
print '%d/%d bad lines' % (badl, totp)
print 'times:', min_tm, max_tm

    
accum_tot(accums[0], accums[1], accums[2])

for i in [1, 0, 2]:
    acc = accums[i]

    acc.nurl = len(acc.ud)
    acc.nserv = len(acc.sd)
    acc.ncli = len(acc.cd)

    acc.nsdel = 0
    acc.nsbdel = 0
    for s in acc.sd.values():
        if s[1]:
            acc.nsbdel += 1
        if s[3]:
            acc.nsdel += 1

    acc.nudel = 0
    acc.nubdel = 0
    for u in acc.ud.values():
        if u[1]:
            acc.nubdel += 1
        if u[3]:
            acc.nudel += 1
 
    acc.report()

userv = 0
itsd = accums[1].sd
for s in accums[0].sd.keys():
    if not itsd.has_key(s):
        userv += 1
        
uurl = 0
itud = accums[1].ud
for s in accums[0].ud.keys():
    if not itud.has_key(s):
        uurl += 1
        
rep('%d servers %d urls not seen in trees' % (userv, uurl))
rep('%d single unlinked' % (single_unlinked))

do_pagestuff(servs['P'])
        
rep_rep()

## for fn in ofnms:
##     #tmpfile = os.tempnam('/tmp')
##     tmpfile = fn + '.sorted'
##     sortcmd = 'sort -n -o %s %s' % (tmpfile, fn) 
##     mvcmd = 'mv %s %s ' % (tmpfile, fn)

##     for cmd in [sortcmd, mvcmd]:
##         status, output = commands.getstatusoutput(cmd)
##         if status:
##             print cmd, 'failed with', output

