#! /usr/bin/env python
###############################################################################
#                                                                             #
#   Copyright 2005 University of Cambridge Computer Laboratory.               #
#                                                                             #
#   This file is part of Nprobe.                                              #
#                                                                             #
#   Nprobe is free software; you can redistribute it and/or modify            #
#   it under the terms of the GNU General Public License as published by      #
#   the Free Software Foundation; either version 2 of the License, or         #
#   (at your option) any later version.                                       #
#                                                                             #
#   Nprobe is distributed in the hope that it will be useful,                 #
#   but WITHOUT ANY WARRANTY; without even the implied warranty of            #
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the             #
#   GNU General Public License for more details.                              #
#                                                                             #
#   You should have received a copy of the GNU General Public License         #
#   along with Nprobe; if not, write to the Free Software                     #
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA #
#                                                                             #
###############################################################################


from sys import argv
import getopt
import os
import sys
import re
import commands
from math import sqrt
import Numeric

import np_warnings

from np_treestats import NOB_THRESH

from np_TCPConn import D_BIGDEL

from nprobe import CT_TEXT_HTML, CT_TEXT_XML

N_SERV_BKTS = 1000

replist = []

def rep(s):
    replist.append(s)

def rep_per(args):
    boss_v, boss_t = args[0]
    if not boss_v:
        return

    rep('%d %s:' % (boss_v, boss_t))
    for v, t in args[1:]:
        if v:
            pcf = 100.0/boss_v
            rep('\t%d %s (%.2f%%)' % (v, t, v*pcf))

def rep_rep():
    try:
        sf = open(sumfilenm, 'w')
    except IOError, s:
        print 'Couldn\'t open summary file', s
        sys.exit(1)
        
    for s in replist:
        print s
        sf.write(s + '\n')
    
               

acc_fields = ['totpages', 'nobs', 'nconns', 'npdel', 'npbdel', 'npbdel85',
              'no_del_del', 'sp', 'lp', 'unlinked', 'refr', 'inv', 'rvst']

class accum:

    def __init__(self, what):
        self.what = what
        for f in acc_fields:
            setattr(self, f, 0)
        self.ud = {}
        self.sd = {}
        self.cd = {}
        self.nservd = Numeric.zeros(N_SERV_BKTS,)

    def report(self):
        rep(self.what)
        rep('%d servers %d clients %d obs %d conns' \
              % (self.nserv, self.ncli, self.nobs, self.nconns))
        rep_per([(self.nurl, 'URLs')])
        page_per = [
            (self.totpages, 'Page downloads'),
            (self.sp, 'lt %d obs' % (nob_thresh)),
            (self.lp, 'gt %d obs' % (nob_thresh))
            ]
        single = self.unlinked+self.refr+self.inv+self.rvst
        if single:
            page_per += [
                (single, 'single objects not in trees'),
                (self.unlinked, 'unlinked'),
                (self.refr, 'refreshes'),
                (self.rvst, 'revisits'),
                (self.inv, 'invalid')
                ]
        if self.npdel:
            page_per += [
                (self.npdel, 'delayed'),
                (self.npbdel, 'long delayed'),
                (self.npbdel85, 'long delayed at 85%%')
                ]
        if self.no_del_del:
            page_per += [(self.no_del_del, 'delays not adding to total')]
        rep_per(page_per)
        rep_per([(self.nserv, 'Servers'),
                 (self.nsdel, 'delayed'),
                 (self.nsbdel, 'long delayed')
                 ])
        rep_per([(self.nurl, 'URLs'),
                 (self.nudel, 'delayed'),
                 (self.nubdel, 'long delayed')
                 ])
        rep('Distribution of servers/page')
        for i in range(N_SERV_BKTS):
            ns = self.nservd[i]
            if ns:
                rep('  %d %12d' % (i, ns))
        rep('\n')


def accum_tot(a, b, c):
    c.ud = {}
    c.ud.update(a.ud)
    for v in b.ud.items():
        s = v[1]
        e = c.ud.setdefault(v[0], [0,0,0,0])
        e[0] += s[0]
        e[1] += s[1]
        e[2] += s[2]
        e[3] += s[3]
    c.sd = {}
    c.sd.update(a.sd)
    for v in b.sd.items():
        s = v[1]
        e = c.sd.setdefault(v[0], [0,0,0,0])
        e[0] += s[0]
        e[1] += s[1]
        e[2] += s[2]
        e[3] += s[3]
    c.cd = {}
    c.cd.update(a.cd)
    for v in b.cd.items():
        s = v[1]
        e = c.cd.setdefault(v[0], [0,0,0,0])
        e[0] += s[0]
        e[1] += s[1]
        e[2] += s[2]
        e[3] += s[3]

    for i in range(N_SERV_BKTS):
        c.nservd[i] = a.nservd[i] + b.nservd[i] 
        
    for f in acc_fields:
        setattr(c, f, getattr(a, f) + getattr(b, f))
    


scriptname = os.path.basename(argv[0])
ofnm = None
nob_thresh = NOB_THRESH

try:
    optlist, args = getopt.getopt(sys.argv[1:], 'o:n:')

except getopt.error, s:
    print '%s: %s' % (scriptname, s)
    usage(scriptname)
    sys.exit(1)

for opt in optlist:
    if opt[0] == '-o':
        ofnm = opt[1]
    if opt[0] == '-n':
        nob_thresh = opt[1]

start_re = re.compile('# Run start = ([0-9]*).*')
start = 0

accums = [accum('NOT IN TREES:'), accum('IN_TREES:'), accum('TOTAL:')]

basedir = os.path.dirname(args[0])

frange = []
suffs = []
pref_re = re.compile('(.\.rep\.\d*)-(.\.rep\.\d*)(\..*)')
print 'files from', os.getcwd(), ':'
rep('files from %s:' % (os.getcwd()))
for fnm in args:
    fnm = os.path.basename(fnm)
    print '   ', os.path.basename(fnm)
    m = pref_re.match(fnm)
    if m:
        #print m.group(1), m.group(2), m.group(3)
        frange.append(m.group(1))
        frange.append(m.group(2))
        suffs.append(m.group(3))

if frange:
    for suff in suffs[1:]:
        if suff != suffs[0]:
            print 'Ouch mixed suffix:', suff
    frange.sort()
    ofnm = frange[0] + '-' + frange[-1] + suffs[0]
    #print frange

else:
    pref_re = re.compile('(.\.rep\.\d*)\.(.*)') 
    m = pref_re.match(os.path.basename(fnm))
    if m: 
        #print m.group(1), m.group(2)
        ofnm = m.group(0)

if not ofnm:
    pref_re = re.compile('.*\.Pages') 
    m = pref_re.match(os.path.basename(fnm))
    if m:
        ofnm = os.path.basename(fnm)

if not ofnm:
    ofnm = raw_input('output files base? - base is %s\n?' % (basedir))
    if not ofnm:
        ofnm = os.path.basename(fnm)

if ofnm[0] == '-':
    ofnm = ofnm[1:]
    
outfbase = os.path.join(basedir, ofnm)
#print 'writing to'
rep('Output written to:')

writes = [['', []], ['.lt%d' % (nob_thresh), []], ['.gt%d' % (nob_thresh), []]]
ofnms = []
for suff2, write in writes:
    for suff1 in ['.dur', '.dur85', '.ndur', '.ndur85', '.del', '.del85', '.acc_del', '.ddf']:
        outfnm = outfbase + suff1 + suff2
        ofnms.append(outfnm)
        try:
            f = open(outfnm, 'w')
        except IOError, s:
            print 'Couldn\'t open data file', s
            sys.exit(1)
        write.append(f.write)
        #print 'writing to', os.path.basename(outfnm)
        #print outfnm
        rep(outfnm)

sumfilenm = outfbase + '.Summary'
#print 'Summary file is', sumfilenm
rep('Summary file is %s' % (sumfilenm))

#sys.exit(0)
totp = 0
badl = 0
min_tm = 99999999999999999.9999
max_tm = 0
single_unlinked = 0
for fnm in args:
    print os.path.basename(fnm)
    f = open(fnm, 'r')
    lno = 0
    for l in f.readlines():
        l = l.replace('\n', '')
        if l[0] == '#':
            m = start_re.match(l)
            if m:
                strt = long(m.group(1))/1000000.0
                #print 'start = ', start
                if start == 0:
                    start = strt
                strt -= start
            print 'start is', start, 'offset is', strt
            continue
        sf = l.rstrip().split(' ')
        totp += 1
        intree = 1
        #print sf
        #url, cli, serv, by, no, nc, tm, dur, ndur, dur85, ndur85, dflags = sf
        le = len(sf)
        if le == 18:
            pagenum = int(sf[0])
            sf = sf[1:]
            how = ''
            url = sf[0]
            cli = sf[1]
            serv = sf[2]
            by = int(sf[3])
            no = int(sf[4])
            nc = int(sf[5])
            lnk = int(sf[6])
            ctype = lnk >> 16
            ltype = lnk & 0xFFFF
            nservs = int(sf[7])
            tm = float(sf[8]) + strt
            dur = int(sf[9])
            ndur = int(sf[10])
            dur85 = int(sf[11])
            ndur85 = int(sf[12])
            dflags = int(sf[13])
            acc_del = int(sf[14])
            delv = long(sf[15])
            cnt_del = int(sf[16])
        elif le == 10:
            intree = 0
            how = sf[0]
            url = sf[1]
            cli = sf[2]
            serv = sf[3]
            by = int(sf[4])
            ctype = int(sf[5])
            no = 1
            nc = 1
            tm = float(sf[6])/1000 + strt
            dur = int(sf[7])
            dur85 = dur
            ndur = int(sf[8])
            ndur85 = ndur
            dflags = int(sf[9])
            acc_del = cnt_del = dur - ndur
            delv = acc_del*acc_del
            nservs = 1

        ##     if ctype == CT_TEXT_HTML or ctype == CT_TEXT_XML:
##                 #single object page
##                 single_unlinked += 1
##                 intree = 1
##                 ltype = 0
            
        else:
            print 'pagesum - bad line %s %d: %s %d fields' % (fnm, lno, l, le)
            badl += 1
            continue

        if ndur == 0:
            continue
        
        #print how, tm
        min_tm = min(tm, min_tm)
        max_tm = max(tm, max_tm)
        
        acc = accums[intree]
        acc.totpages += 1
        ue = acc.ud.setdefault(url, [0,0,0,0])
        se = acc.sd.setdefault(serv, [0,0,0,0])
        ce = acc.cd.setdefault(cli, [0,0,0,0])

        ue[0] += 1
        se[0] +=1
        ce[0] += 1
        
        dl = dur - ndur
        dl85 = dur85 - ndur85
        if dl:
            acc.npbdel += 1
            ue[1] += 1
            se[1] += 1
        if dl85:
            acc.npbdel85 += 1
            ue[2] += 1
            se[2] += 1
        if dflags:
            acc.npdel += 1
            ue[3] += 1
            se[3] += 1

        acc.nobs += no
        acc.nconns += nc
        #print totdur, durlessdel, tot85dur, dur85lessdel

        if not intree:
            if no != 1:
                print 'Single object goof %d objects %s %d: %s' \
                      % (no, fnm, lno, l)
            if how == 'U':
                acc.unlinked += 1
            elif how == 'R':
                acc.refr += 1
            elif how == 'I':
                acc.inv += 1
            elif how == 'V':
                acc.rvst += 1
            else:
                print 'Invalid non-tree reason %s %d: %s' % (fnm, lno, l)
                sys.exit(1)

        write = writes[0][1]
        write[0]('%.3f\t%d\n' % (tm, dur))
        write[1]('%.3f\t%d\n' % (tm, dur85))
        write[2]('%.3f\t%d\n' % (tm, ndur))
        write[3]('%.3f\t%d\n' % (tm, ndur85))
        if dl:
            write[4]('%.3f\t%d\n' % (tm, dl))
        if acc_del and dur:
            write[6]('%.3f\t%.2f\n' % (tm, ((acc_del/no)*100.0)/ndur))
            write[7]('%.3f\t%.2f\n' % (    tm, (sqrt(delv/no))/ndur)   )
        if dl85:
            write[5]('%.3f\t%d\n' % (tm, dur85-ndur85))

        if no > nob_thresh:
            write = writes[2][1]
            acc.lp += 1
        else:
            write = writes[1][1]
            acc.sp += 1
        write[0]('%.3f\t%d\n' % (tm, dur))
        write[1]('%.3f\t%d\n' % (tm, dur85))
        write[2]('%.3f\t%d\n' % (tm, ndur))
        write[3]('%.3f\t%d\n' % (tm, ndur85))
        if dl:
            write[4]('%.3f\t%d\n' % (tm, dur-ndur))
        if acc_del and dur:
            write[6]('%.3f\t%.2f\n' % (tm, ((acc_del/no)*100.0)/ndur))
            write[7]('%.3f\t%.2f\n' % (tm, (sqrt(delv/no))/ndur))
        if dl85:
            write[5]('%.3f\t%d\n' % (tm, dur85-ndur85))
            
        if acc_del and not (dur-ndur):
            acc.no_del_del += 1

        acc.nservd[nservs] += 1
        

        lno += 1 

if not totp:
    print 'No pages in page file(s)'
    sys.exit(1)

print totp, 'pages'
print '%d/%d bad lines' % (badl, totp)
print 'times:', min_tm, max_tm

    
accum_tot(accums[0], accums[1], accums[2])

for i in [1, 0, 2]:
    acc = accums[i]

    acc.nurl = len(acc.ud)
    acc.nserv = len(acc.sd)
    acc.ncli = len(acc.cd)

    acc.nsdel = 0
    acc.nsbdel = 0
    for s in acc.sd.values():
        if s[1]:
            acc.nsbdel += 1
        if s[3]:
            acc.nsdel += 1

    acc.nudel = 0
    acc.nubdel = 0
    for u in acc.ud.values():
        if u[1]:
            acc.nubdel += 1
        if u[3]:
            acc.nudel += 1
 
    acc.report()

userv = 0
itsd = accums[1].sd
for s in accums[0].sd.keys():
    if not itsd.has_key(s):
        userv += 1
        
uurl = 0
itud = accums[1].ud
for s in accums[0].ud.keys():
    if not itud.has_key(s):
        uurl += 1
        
rep('%d servers %d urls not seen in trees' % (userv, uurl))
rep('%d single unlinked' % (single_unlinked))
        
rep_rep()

## for fn in ofnms:
##     #tmpfile = os.tempnam('/tmp')
##     tmpfile = fn + '.sorted'
##     sortcmd = 'sort -n -o %s %s' % (tmpfile, fn) 
##     mvcmd = 'mv %s %s ' % (tmpfile, fn)

##     for cmd in [sortcmd, mvcmd]:
##         status, output = commands.getstatusoutput(cmd)
##         if status:
##             print cmd, 'failed with', output
