# -*- coding: cp1252 -*-
# The aim of this scipt is to parse the Facebook JSON response
# to an FB query of the form:
#
# select uid1,uid2
# from friend
# where uid1 in (select uid from group_member where gid=<gid>)
#   and uid2 in (select uid from group_member where gid=<gid>)
#
# The network needs to be anonymized

import sys
import hmac
import hashlib
import re

def shash(message, purpose, aKEY, length=8):
    # Make a keyed hash to anonymize IDs
    h = hmac.new(aKEY, purpose + '\0' + message, hashlib.sha256)
    return h.hexdigest()[:length]

if len(sys.argv) != 4:
    print "Usage: ParseFBdata INFILE.txt OUTFILE.net KEY"
    sys.exit()

# Get the data
FILE = sys.argv[1]
data = file(FILE,'r').read()
KEY = sys.argv[3]

Friends = re.findall('\{"uid1":"(\d+)","uid2":"(\d+)"\}', data)
aFriends = [(shash(uid1, "FRIEND", KEY),shash(uid2, "FRIEND", KEY)) for uid1,uid2 in Friends]
del data, Friends, KEY # Never handle private data again

# Pretty print friends it as a csv file
aFriends.sort()
output = ["USER1,USER2"]
for (aid1, aid2) in aFriends:
    output.append('%s,%s' % (aid1, aid2))

# Write the anonymized network to a file
OUTFILE = sys.argv[2]
fout = file(OUTFILE,'w')
fout.write("\n".join(output))
fout.close()
