# Read a ".net" file and construct a dictionary representing
# the social network. From the dictionary build a set of traces
# as well as a reference file containing the conversation
# patterns.

try:
    import psyco
    psyco.full()
except ImportError:
    pass

import sys
import hashlib
import hmac
from random import uniform, shuffle, seed
import numpy.random

def shash(message, purpose, aKEY, length=8):
    # Make a keyed hash to anonymize IDs
    h = hmac.new(aKEY, purpose + '\0' + message, hashlib.sha256)
    return h.hexdigest()[:length]

if len(sys.argv) != 8:
    print "Usage: makeTraces INFILE.net OUTFILE.soc OUTFILE.dot OUTFILE.msg OUTFILE.trc KEY SEED"
    sys.exit()

KEY = sys.argv[6]
SEED = int(sys.argv[7])
seed(SEED)
numpy.random.seed(SEED - 1)

# read the network
FILE = sys.argv[1]
fin = file(FILE,'r')
# skip the header
assert fin.readline()== "USER1,USER2\n"

fsoc = file(sys.argv[2],'w')
fdot = file(sys.argv[3],'w')

# Make a .dot file as we go along
fdot.write('graph friends\n{\ngraph[ratio=fill,size="50,50"];\n')

Net = {} # A dictionary of sets of contacts
AllRel = []
for line in fin:
    try:
        aid1, aid2 = line[:-1].split(",")
        AllRel += [(aid1, aid2)]
        # If necessary add the users to the network
        if aid1 not in Net:
            Net[aid1] = set()
        if aid2 not in Net:
            Net[aid2] = set()
        # Make the connection
        Net[aid1].add(aid2)
        Net[aid2].add(aid1)
        if aid1 > aid2:
            # Add the link to the .dot file
            fdot.write('"%s" -- "%s";\n' % (aid1, aid2))
    except:
        print "PARSE ERROR"
        raise
fdot.write('}\n')
fdot.close()

# Extract the degree of the network
Targets = [i for i in Net if len(Net[i]) == 20]
for t in Targets:
    print "Target: %s (Deg: %s)" % (t, len(Net[t]))
    print "Friends: %s" % ', '.join("%s (%s)" % (ti, len(Net[ti])) for ti in Net[t])
    print

#import sys
#sys.exit()

# tag types of relationships
TypeRel = {} # can be 'w' for work, or 'f' for friend, 'c' for close (x3 rate)
p_work = 0.60
p_both = 0.05
p_close = 0.1

# Label each relationship
for (a,b) in AllRel:
    if (a,b) in TypeRel:
        fsoc.write("%s,%s,%s\n" % (a,b, '|'.join(L)))
        continue
    L = []
    # Label work and friends
    if uniform(0,1) < p_work:
        L += ['w']
        if uniform(0,1) < p_both:
            L += ['f']
    else:
        L += ['f']
    # Label close relations
    if uniform(0,1) < p_close:
            L += ['c']
    L.sort()
    TypeRel[(a,b)] = L
    TypeRel[(b,a)] = L

    # LOG relations
    fsoc.write("%s,%s,%s\n" % (a,b, '|'.join(L)))

# Close the .soc file
fsoc.close()

# label the rate for each time of the day and the weekend
# List the timezones
TZ = []

# Weekdays days
TZ += [('n', range(0,2), 'w', 'L')]
TZ += [('n', range(0,2), 'f', 'L')]
TZ += [('n', range(2,8), 'w', 'VL')]
TZ += [('n', range(2,8), 'f', 'VL')]
TZ += [('n', range(8,10), 'w', 'L')]
TZ += [('n', range(8,10), 'f', 'VL')]
TZ += [('n', range(10,18), 'w', 'H')]
TZ += [('n', range(10,18), 'f', 'L')]
TZ += [('n', range(18,22), 'w', 'L')]
TZ += [('n', range(18,22), 'f', 'H')]
TZ += [('n', range(22,24), 'w', 'L')]
TZ += [('n', range(22,24), 'f', 'M')]

# weekends and vacations
TZ += [('v', range(0,2), 'w', 'VL')]
TZ += [('v', range(0,2), 'f', 'L')]
TZ += [('v', range(2,8), 'w', 'VL')]
TZ += [('v', range(2,8), 'f', 'VL')]
TZ += [('v', range(8,10), 'w', 'VL')]
TZ += [('v', range(8,10), 'f', 'VL')]
TZ += [('v', range(10,18), 'w', 'L')]
TZ += [('v', range(10,18), 'f', 'H')]
TZ += [('v', range(18,22), 'w', 'L')]
TZ += [('v', range(18,22), 'f', 'H')]
TZ += [('v', range(22,24), 'w', 'VL')]
TZ += [('v', range(22,24), 'f', 'M')]

Rates = {}
for (dayType, hours, reltype, rate) in TZ:
    if (dayType,reltype) not in Rates:
        Rates[(dayType,reltype)] = [None] * 24
    for hi in hours:
        Rates[(dayType,reltype)][hi] = rate

# Poisson sending rates per type
RatesLambda = {'H':0.01, 'M':0.005, 'L':0.0025, 'VL':0.0005}
RatesReply = {'H': 0.35, 'M':0.35, 'L':0.6, 'VL':0.6}

import scipy.stats
TimeLine = (['n']*5 + ['v']*2) * 13

poisson = scipy.stats.distributions.poisson.rvs
geom = scipy.stats.distributions.geom.rvs

convid = 1
EVENTS = []
# Gather events
for (j,(a,b)) in zip(range(len(AllRel)), AllRel):
    Sending = []
    Labels = TypeRel[(a,b)]
    print j,len(AllRel),Labels,len(EVENTS)
    for i,day in zip(range(len(TimeLine)),TimeLine):
        for hour in range(24):
            # Is a message sent this hour?
            Labels2 = set(Labels) - set(['c'])
            mass = 0
            for l in Labels2:
                actual_rate = RatesLambda[Rates[day,l][hour]] # Poisson distribution
                if 'c' in Labels: # close people talk more often
                    actual_rate *= 2
                mass += poisson(actual_rate) # number of messages to send from a -> b
            
            for mid in range(mass):
                # Should there be any replies?
                reply_rate = RatesReply[Rates[day,l][hour]] # Geometric distribution
                rep = geom(reply_rate)
                # print i,hour,rep[0] - 1
                convid += 1
                EVENTS += [(convid,
                            a,
                            b,
                            rep-1,
                            i,
                            uniform(hour,hour+1))]

(iCONVID, iSENDER, iRECV, iREP, iDAY, iHOUR) = range(6)

def getID(e, typex='IN'):
    msg = (e[iSENDER] + "\0" + e[iRECV] + "\0" + str(e[iDAY]) + "\0" + str(e[iHOUR]) + "\0" +  str(e[iCONVID]))
    return shash(msg, typex, KEY, 20)

def reply_schedule(curTime, curDay, event):
    assert event[iREP] > 0
    # Specify a time for a reply message
    # Extract the relationship between sender and receiver
    if 'c' in TypeRel[(event[iSENDER],event[iRECV])]:
        delay_rate = 0.50
    else:
        delay_rate = 0.25
        
    if 'w' in TypeRel[(event[iSENDER],event[iRECV])]:
        l = 'w'
    else:
        l = 'f'
    # Chose a time
    cand = (0,0)
    # But repect causality! Cannot reply before receiving!
    while cand < (curDay, curTime):
        delay_days = geom(delay_rate) - 1
        C = []
        for hour in range(0, 24):
            actual_rate = RatesLambda[Rates[TimeLine[(curDay+delay_days) % 7],l][hour]]
            C += [(poisson(actual_rate), uniform(0,1), hour)]
        C.sort()
        # print C[-1]
        # select the higest one
        maxFlow, _, maxHour = C[-1]
        if maxFlow > 0:
            cand = (curDay+delay_days, uniform(maxHour, maxHour+1))
        else:
            cand = (0,0)
    # Build-up the new event to inject
    e = (event[iCONVID], event[iRECV], event[iSENDER], event[iREP]-1, cand[0],cand[1])
    return (cand[0],cand[1], event[iCONVID]), e


from heapq import heappop,heappush,heapify

# All conversation events are recorded
# So now it is time to push them through a Threshold Mix
EVENTS_DICT = {}
for e in EVENTS:
    EVENTS_DICT[(e[iDAY], e[iHOUR], e[iCONVID])] = e

# Files to write traces and events
fmsg = file(sys.argv[4],'w')
ftrc = file(sys.argv[5],'w')

Time = EVENTS_DICT.keys()
Time.sort()
heapify(Time)

# Place where we store the messages to be replied to
Reply_Queues = {}
BATCH_SIZE = 50
batch = []
MIX_ROUND = 0
ftrc.write("ID,SENDER,RECEIVER,ROUND,DAY,TIME\n")
fmsg.write("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n" % ('ID1','ID2','CONVID','SENDER','RECEIVER', 'REPLYSEQ', 'DAYIN', 'TIMEIN', 'DAYOUT', 'TIMEOUT'))
while(len(Time) > 0 and Time[0][0] < len(TimeLine)):
    # process and event at a time!
    t = heappop(Time)

    batch += [t]
    # LOG HERE INPUTS
    ec = EVENTS_DICT[t]
    ftrc.write("%s,%s,%s,%s,%s,%s\n" % (getID(ec),ec[iSENDER],'MIX', MIX_ROUND, ec[iDAY], ec[iHOUR]))

    if len(batch) == BATCH_SIZE:
        shuffle(batch) # Mix it!
        # Flush all!
        print "Flush %s,%s" % (t[0], t[1])
        print '*' * (len(Time) / 10)
        print "Events left: %s" % len(Time)
        # Check for any messages requiring a reply
        for e in batch:
            if EVENTS_DICT[e][iREP] > 0:
                ek,ev = reply_schedule(t[1], t[0], EVENTS_DICT[e])
                EVENTS_DICT[ek] = ev
                # print "Reply at %s" % str(ek)

                heappush(Time, ek)
            # LOG HERE
            ec = EVENTS_DICT[e]
            ftrc.write("%s,%s,%s,%s,%s,%s\n" % (getID(ec,'OUT'),'MIX', ec[iRECV], MIX_ROUND, t[0], t[1]))
            # ALSO LOG THE FULL EVENT
            fmsg.write("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n" % (getID(ec),getID(ec,'OUT'),ec[iCONVID],ec[iSENDER],ec[iRECV], ec[iREP], ec[iDAY], ec[iHOUR],t[0], t[1]))


        print "Events left: %s" % len(Time)
        # Clear the batch
        # LOG HERE
        batch = []
        MIX_ROUND += 1

ftrc.close()
fmsg.close()
