#! /usr/bin/env python
###############################################################################
#                                                                             #
#   Copyright 2005 University of Cambridge Computer Laboratory.               #
#                                                                             #
#   This file is part of Nprobe.                                              #
#                                                                             #
#   Nprobe is free software; you can redistribute it and/or modify            #
#   it under the terms of the GNU General Public License as published by      #
#   the Free Software Foundation; either version 2 of the License, or         #
#   (at your option) any later version.                                       #
#                                                                             #
#   Nprobe is distributed in the hope that it will be useful,                 #
#   but WITHOUT ANY WARRANTY; without even the implied warranty of            #
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the             #
#   GNU General Public License for more details.                              #
#                                                                             #
#   You should have received a copy of the GNU General Public License         #
#   along with Nprobe; if not, write to the Free Software                     #
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA #
#                                                                             #
###############################################################################


##############################################################################
## 
## 
## Check two directories for diffing files, display differences
## 
##
## 

############################################################################ 
############################################################################ 

from string import *
from sys import argv
import getopt
import os
import sys
from stat import *
from os.path import basename, dirname, join, isfile, isdir, islink, realpath, \
     commonprefix, normpath

from sys import exit
import popen2
import re

from dirwalk import walk, resolve_path

##############################################################################
## 
## Turn off some tedious warnings
##
## 
############################################################################
from warnings import filterwarnings

# tempnam security risk
filterwarnings('ignore', 'tempnam is a potential security risk to your program')            
##############################################################################

DEFAULT_FIRSTDIR = '/usr/groups/nprobe/jch1003/www/swig/grunge'
DEFAULT_SECDIR = '/usr/groups/nprobe/jch1003/www/swig_dev/grunge'

UNWANTED_SUFFIXES = ['pyc', 'so', 'ps', 'o']
UNWANTED_TAILS = ['~', '#']

DIFF = '/usr/bin/diff'
PFIND = '/homes/jch1003/bin/python/pfind'
            
             
##############################################################################

def unwanted_suff(f):

    if f[-1] in UNWANTED_TAILS:
        return 1

    parts = f.split('.')
    if len(parts) > 1 and parts[-1] in UNWANTED_SUFFIXES:
        #print parts
        return 1
    return 0
            
             
##############################################################################

def lsdir(dir, ferr):

    present = {}

    try:
        for f in os.listdir(dir):
            file = join(dir, f)
            try:
                mode = os.stat(file)[ST_MODE]
            except OSError, s:
                print s
                ferr.append((file, 'stat', str(s)))
                continue
            if S_ISDIR(mode):
                continue
            if unwanted_suff(file):
                continue
            #print f
            present[f] = 1
    except OSError, s:
        ferr.append((dir, 'ls', s))

    return present
        
#############################################################################

def do_diff(f, dir1, dir2, debug, context):

    def detailed_report(diffcmd, o, e):

        print 'Running', diffcmd
        diffs = o.readlines()
        if diffs:
            raw_input('See it...')
            print 'Diffs'
            for l in diffs:
                print l.strip()
        else:
            print 'Same'
            
        errs =  e.readlines()
        if errs:
            print 'Error: '
            for err in errs:
                print err.strip()
        raw_input('Next...')

    def printlines(b):
        for l in b:
            print l.strip()

    def eprintlines(b):
        for l in b:
            eprint(l.strip())

    def escape_respecials(l):

        specials = '\.^$*+?{}[]()|'
        for c in specials:
            l = l.replace(c, '\%s' % (c))

        return l

    def get_tmpf():
        
        tmpfnm = os.tempnam('\tmp', 'dirrdif.')
        try:
            tmpf = open(tmpfnm, 'w')
            return (tmpf, tmpfnm)
        except IOError, s:
            sys.stderr.write('dirdiff: error opening temp file: %s\n' % (str(s)))
            sys.exit(1)

    def run_pfind(ccmd):
        
        #print 'running ', ccmd
        o, i, e = popen2.popen3(ccmd)
        cont = o.readlines()
        errs = e.readlines()
        printlines(cont)
        if errs:
            print 'Pfind produced the following errors:'
            printlines(errs)

        
    def do_pfind(which, files, diffs):

        file = files[which-1]
        symbol = ['<', '>'][which-1]
        lines = [l[1:-1] for l in diffs if l[0] == symbol]
        if not lines:
            print 'No differing lines in', file
            return
        tmpf, tmpfnm = get_tmpf()
        
        for l in lines:
            if len(l.rstrip()):
                tmpf.write('^' + '\s*' + escape_respecials(l.lstrip()) + '$\n')

        tmpf.close()

        ccmd = '%s -f%s %s' % (PFIND, tmpfnm, file)
        run_pfind(ccmd)

        os.unlink(tmpfnm) 

    def do_lpfind(which, files, diffs):

        file = files[which-1]
        goff = (which-1)*4
        #print 'goff', goff
        whichop = ['d', 'a'][which-1]
        #print whichop
        linesre = re.compile('^(\d+)(,(\d+))?([acd])(\d+)(,(\d+))?')
        tmpf, tmpfnm = get_tmpf()
        for l in diffs:
            m = linesre.match(l)
            if m:
                #print l
                gg = m.groups()
                #print gg
                op = gg[3]
                if op == 'c' or op == whichop:
                    if gg[goff]:
                        tmpf.write(gg[goff])
                        #print gg[goff],
                        if gg[goff+2]:
                            tmpf.write('-' + gg[goff+2])
                            #print '-' + gg[goff+2]
                        tmpf.write('\n')
                        #print
                    else:
                        print 'Failed to understand diff line \'%s\'' % (l)
        tmpf.close()

        ccmd = 'pfind -l%s %s' % (tmpfnm, file)
        run_pfind(ccmd)

        os.unlink(tmpfnm)

    def diffdiffs(diffs):

        diffd = {}
        diffdiffs = []

        for d in diffs:
            if d[0] == '>':
                diffd[d[1:]] = d
        for d in diffs:
            if d[0] == '<':
                if diffd.has_key(d[1:]):
                    diffd[d[1:]] = None
                else:
                    diffd[d[1:]] = d

        return [d for d in diffd.values() if d]
        
    #
    # Main Fn starts here
    #
    print 'Diffing %s' % (f),

    first = join(dir1, f)
    second = join(dir2, f)

    diffcmd = '%s -B \'%s\' \'%s\'' % (DIFF, first, second)
    o, i, e = popen2.popen3(diffcmd)

    if debug:
        detailed_report(diffcmd, o, e)
    else:
        diffs = o.readlines()
        errs =  e.readlines()
        
        if errs:
            print
            print 'Error running %s: ' % (diffcmd)
            printlines(errs)
            sys.exit(1)
        
        if diffs:
            print ' - differences exist'
            if context and f[-3:] == '.py':
                menustr = ' <SPC> to see diffs\n  1 to see context 1 - diff lines only\n 11 to see context 1 with matching lines\n111 to see context 1 with matching lines but common lines removed\n2/22/222/ for context 2\n  <RET> to continue\n..?'
                nulstr = '..?'
                askstr = menustr
                while 1:
                    ans = raw_input(askstr)
                    if ans == '':
                        break
                    elif ans == ' ':
                        printlines(diffs)
                        askstr = menustr
                    elif ans == '11' or ans == '22':
                        do_pfind(int(ans[-1]), [first, second], diffs)
                        askstr = menustr
                    elif ans == '111' or ans == '222':
                        do_pfind(int(ans[-1]), [first, second], diffdiffs(diffs))
                        askstr = menustr
                    elif ans == '1' or ans == '2':
                        do_lpfind(int(ans), [first, second], diffs)
                        askstr = menustr
                    else:
                        askstr = nulstr
                        continue
            elif context:
                while 1:
                    ans = raw_input('<SPC><RET> to continue <RET> to see diffs ..?')
                    if ans == '':
                        printlines(diffs)
                        break
                    elif ans == ' ':
                        break
            else:
                raw_input('<RET> to continue ..?')
                #printlines(diffs)
                          
        else:
            print ' - same'
        errs = []

    return
    
##############################################################################


def walk_check(tree_root, excluded):

    # shorten absolute paths by root dir element
    def rcp(fnm):
        return fnm.replace(tree_root, '$ROOT$')

    # field os errors during walk, but collect notification in errlist
    def ignore_OSError(s):
        pass

    exlist = []

    d1 = {}
    d2 = {}

    dlinks = []
    flinks = []

    #print 'tree_root', tree_root, rcp(tree_root)

##     firstel = tree_root.split('/')[0]
##     if firstel  == '.' or firstel == '..' or tree_root[0] != '/':
##         tree_root = normpath(join(os.getcwd(), tree_root))


    for root, dirs, files in walk(tree_root, onerror=ignore_OSError, exlist=exlist, excluded=excluded):
        #print 'root', root, rcp(root)

        for d in dirs:
            dir = join(root, d)
            if islink(dir):
                #print 'Linked dir', rcp(dir), '->', rcp(realpath(dir))
                dlinks.append((rcp(dir), rcp(realpath(dir))))
        for f in files:
            #print f
            if unwanted_suff(f):
                continue
            file = join(root, f)
            if islink(file):
                #print 'Linked file', file, '->', realpath(file)
                flinks.append((rcp(file), rcp(realpath(file))))
                continue
            d = d1.setdefault(f, {})
            d[root] = 1

    ft = d1.items()
    ft.sort()

    dups = []
    for f in ft:
        if len(f[1]) > 1:
            dirs = f[1].keys()
            dirs.sort()
            #print '\'%s\' exists in \'%s\' and' % (f[0], rcp(dirs[0]))
           ##  dups.append('\'%s\' in \'%s\' and' % (f[0], rcp(dirs[0])))
##             for d in dirs[1:]:
##                 #print '\t\'%s\'' % (rcp(d))
##                 dups.append('\t\'%s\'' % (rcp(d)))
            dups.append((f[0], dirs))

    if dups:
        print
        print 'Following possibly duplicated files found:'
        for d in dups:
            #print d
            print '\'%s\' in:' % (d[0])
            for dr in d[1]:
                print '\t%s' % (rcp(dr))
    else:
        print 'No duplicated files found'

    if dlinks:
        print
        print 'Following soft linked directories found:'
        for d in dlinks:
            print d[0], '->', d[1]

    if flinks:
        print
        print 'Following soft linked files found:'
        for d in flinks:
            print d[0], '->', d[1]

    if exlist:
        if exlist[0]:
            print '\nXXX The following errors occurred:'
            for err in exlist[0]:
                print err
        if exlist[1]:
            print '\nXXX The following directories were not visited multiple times:'
            for d in exlist[1]:
                print '\'%s\' -> \'%s\'' % (d[0], d[1])
        if exlist[2]:
            print '\nXXX The following dirs were excluded:'
            for d in exlist[2]:
                print d
    
##############################################################################

def eprint(s):

    sys.stderr.write(s + '\n')

###############################################################################
 
def get_excluded(el):

    elist = el.split(',')
    rel = []
    for e in elist:
        rel.append(e)

    return rel
    
############################################################################## 
##############################################################################

def usage(nm, msg=None):

    if msg:
        eprint('%s: %s\n' % (nm, msg))
    
    eprint('%s: Compare files in two given directories' % (nm))
    eprint('Usage:')

    eprint('\t%s [flags] dir1 dir2' % (nm))
    eprint('Flags:')
    eprint('\t-f<file> Check only <file>')
    eprint('\t-c Provide context info for Python files')
    eprint('\t-h This help')
    exit(1)
##############################################################################

def help_exclude():

    print 'Excluded directories are given as a comma-separated (no spaces) list of one or more names'
    print 'The names may be '
        
###############################################################################


def main():

    debug = 0
    context = 0
    dowalk = 0
    helpme = 0
    dir1 = DEFAULT_FIRSTDIR
    dir2 = DEFAULT_SECDIR
    file = None
    ferr = []
    diffiles = []
    notinfirst = []
    notinsecond = []
    excluded = []
    
    scriptname = os.path.basename(argv[0])
    
    try:
        optlist, args = getopt.getopt(argv[1:], 'dhcf:we:')

    except getopt.error, s:
        print 'foo', str(s)
        #raise
        usage(scriptname, msg=str(s))

    for opt in optlist:
        if opt[0] == '-h':
            #usage(scriptname)
            helpme += 1
        if opt[0] == '-d':
            debug += 1
        if opt[0] == '-c':
            context = 1
        if opt[0] == '-w':
            dowalk = 1
        if opt[0] == '-f':
            file = basename(opt[1])
        if opt[0] == '-e':
            excluded = get_excluded(opt[1])

    #print args
    #raw_input('...')

    if helpme:
        usage(scriptname)
        if helpme > 1:
            help_exclude()

    if len(args) == 2 and not dowalk:
        dir1 = resolve_path(args[0])
        dir2 = resolve_path(args[1])
    elif len(args) == 1 and dowalk:
        dir1 = resolve_path(args[0])
    else:
        usage(scriptname, msg='wrong number of arguments')

    #print 'dirs', dir1, dir2
    #raw_input('...')

    while 1:
        print '%s: CAUTION - WILL ONLY CHECK VISIBLE FILES - ENSURE ALL ARE CHECKED OUT' % (scriptname)
        print '<RET> to continue, <SPC><RET> to abort'
        ans = raw_input('?')
        if ans == '':
            break
        elif ans == ' ':
            return

    if dowalk:
        walk_check(dir1, excluded)
        return

    firstdir = lsdir(dir1, ferr)
    secdir = lsdir(dir2, ferr)

    infirst = 0
    for f in firstdir:
        #print f
        if file:
            if basename(f) == file:
                infirst = 1
                break
        else:
            if secdir.has_key(f):
                continue
            else:
                notinsecond.append(f)

    insecond = 0
    for f in secdir:
        #print f
        if file:
            if basename(f) == file:
                insecond = 1
                break
        else:
            if firstdir.has_key(f):
                #do_diff(f)
                diffiles.append(f)
            else:
                notinfirst.append(f)
            

    if len(notinsecond):
        print 'Files in first dir but not in second:'
        for f in notinsecond:
            print f
        print
            

    if len(notinfirst):
        print 'Files in second dir but not in first:'
        for f in notinfirst:
            print f
        print

    if file:
        if infirst and insecond:
            diffiles.append(file)
        else:
            if not infirst:
                print 'File %s not found' % (join(dir1, file))
            if not insecond:
                print 'File %s not found' % (join(dir2, file))

    if len(ferr):
        print 'Errors encountered:'
        for f in ferr:
            print f[-1]

    diffiles.sort()
    for f in diffiles:
        do_diff(f, dir1, dir2, debug, context)
            
             
##############################################################################


# Call main when run as script
if __name__ == '__main__':
        main()

    
