import sys

if len(sys.argv) != 4:
    print "Usage: SDA.py INFILE.trc OUTFILE.res TARGET"
    sys.exit()

INFILE = sys.argv[1]
OUTFILE = sys.argv[2]

# Example script to read the Events file
data = file(INFILE, 'r')

# Make the indexes
(iMSGID, iSEND, iRECV, iBATCH, iDAY, iHOUR) = range(6)

# Alice's name
TARGET = sys.argv[3]

# Read and parse the full file
EVENTS = []
for line in data:
    e = line[:-1].split(',')
    if e[0] == "ID":
        # ignore the column titles
        continue
    # Otherwise parse and store the data
    EVENTS += [(e[iMSGID], e[iSEND], e[iRECV], int(e[iBATCH]), int(e[iDAY]), float(e[iHOUR]))]

data.close() # neatly close the file

# Volume of traffic received by each user
# And recording the rounds Alice is sending
AliceRounds = set()
for e in EVENTS:
    if e[iSEND] == TARGET:
        # Record the round number
        AliceRounds.add(e[iBATCH])

# Volume of traffic received by each node GIVEN Alice is sending
RecVolume = {}
TotVolume = 0

RecVolumeGivenAlice = {}
TotVolumeGivenAlice = 0
for e in EVENTS:
    if e[iSEND] == 'MIX':
        if e[iRECV] not in RecVolumeGivenAlice:
            RecVolumeGivenAlice[e[iRECV]] = 0
        if e[iRECV] not in RecVolume:
            RecVolume[e[iRECV]] = 0
    
        if e[iBATCH] in AliceRounds:
            TotVolumeGivenAlice += 1
            # The mix is relaying the traffic
            RecVolumeGivenAlice[e[iRECV]] += 1
        else:
            # The mix is relaying the traffic
            TotVolume += 1
            RecVolume[e[iRECV]] += 1

# Now try to find receivers that are more likely given Alice sending
RecLikelihood = [(float(RecVolumeGivenAlice[rec])/TotVolumeGivenAlice - (49.0 / 50)*(float(RecVolume[rec])/TotVolume), rec) for rec in RecVolumeGivenAlice]
RecLikelihood.sort()
RecLikelihood.reverse()

Probs = {}
import math
for i,(p, rec) in zip(range(len(RecLikelihood)), RecLikelihood):
    Probs[rec] = p
    if p >0 and i < 100:
        if i % 10 == 0:
            print "Receivers %s-%s" % (i+1, i+10)
        print "%s\t -- %s" % (math.log(p), rec)

# Now use the analysis to link messages
AliceSendEvents = {}
AliceRecEvents = {}
for e in EVENTS:
    # Record Alice's sending event
    # and all the delivery events for the round
    if e[iBATCH] in AliceRounds:
        if e[iBATCH] not in AliceSendEvents:
            AliceSendEvents[e[iBATCH]] = []
        if e[iBATCH] not in AliceRecEvents:
            AliceRecEvents[e[iBATCH]] = []
            
        if e[iSEND] == TARGET:
            # Record sending events per round
            AliceSendEvents[e[iBATCH]] += [e]
        if e[iSEND] == 'MIX':
            # Record delivery events per round
            AliceRecEvents[e[iBATCH]] += [e]

# For each of Alice's rounds get the most likely receivers
fres = file(OUTFILE, 'w')
for ri in AliceRounds:
    NumberOfSent = len(AliceSendEvents[ri])
    Scores = [(Probs[e[iRECV]], e) for e in AliceRecEvents[ri]]
    Scores.sort()
    Scores.reverse()
    for (ein, (_, eout)) in zip(AliceSendEvents[ri], Scores[:NumberOfSent]):
        fres.write("%s,%s\n" % (ein[iMSGID], eout[iMSGID]))

fres.close()
