#!/usr/bin/env ruby

$:.unshift File.dirname(__FILE__)
$:.unshift File.join(File.dirname(__FILE__), 'lib')
$:.unshift File.join(File.dirname(__FILE__), 'third_parties')

require 'optparse'
require 'gtvs'

gtvsOpts = GTVS::Options.new

opts = OptionParser.new {|opts|
    opts.banner = "Usage: verify-with-heuristics [options] <trace name> <heuristics script>"
    opts.separator ""
    opts.separator "Specific options:"

    gtvsOpts.set_options(opts)

    opts.separator ""
    opts.separator "Common options:"
    opts.on_tail("-?", "--help", "Show this message") {
        puts opts
        exit
    }
}
opts.parse!(ARGV)

if ARGV.length < 1
    $stderr.puts 'Missing trace name (try --help)'
    exit 1
end
trace = ARGV.shift

if ARGV.length < 1
    $stderr.puts 'Missing heuristics script (try --help)'
    exit 1
end
hscript = ARGV.shift

unless File.exists?(hscript)
    $stderr.puts "Non existing heuristics script #{hscript}"
    exit 1
end

$db = GTVS::Connection.get(gtvsOpts)
$gtvs = GTVS::GTVS.new($db, trace, gtvsOpts)

def check_l7Marks(actual, allowed)
    return (actual - allowed).empty?
end

def do_heuristic1(l7Mark, criteria, allowedL7Marks, flowsPerc)
    sql = <<SQL
SELECT DstIp, DstPort, Flows, L7Marks FROM #{$gtvs.flowsAggByDstPortIp}
WHERE GtVerified = 0 AND #{criteria}
SQL
    $db.query(sql) {|rs|
        #r = rs.fetch_hash
        rs.each {|r|
            dstIp, dstPort, flows, l7Marks = r
            flows = flows.to_i
            l7Marks = l7Marks.split(',')
            # p dstIp, flows, l7Marks
            if !l7Marks.eql?([l7Mark])
                next unless check_l7Marks(l7Marks, allowedL7Marks)
                count = $gtvs.get_flow_count(dstIp, dstPort, l7Mark)
                # p count.to_f / flows.to_f
                next unless count.to_f / flows.to_f > flowsPerc
            end
            $stderr.puts "All #{flows} flows to #{IPAddr.new_i(dstIp.to_i)}:#{dstPort} would be marked as #{l7Mark}"
            aggId = "#{dstIp}_#{dstPort}"
            yield aggId
        }
    }
    
end

def do_heuristic2(l7Mark, criteria, domain)
    sql = <<SQL
SELECT DstIp, DstPort, Flows FROM #{$gtvs.flowsAggByDstPortIp}
WHERE GtVerified = 0 AND #{criteria} AND DstIp IN
(SELECT Ip FROM #{$gtvs.hostNames} WHERE HostName LIKE '%#{domain}')
SQL
    $db.query(sql) {|rs|
        #r = rs.fetch_hash
        rs.each {|r|
            dstIp, dstPort, flows = r
            if l7Mark.is_a?(Hash)
                m = l7Mark[dstPort.to_i]
            else
                m = l7Mark
            end
            next if m == nil
            $stderr.puts "All #{flows} flows to #{IPAddr.new_i(dstIp.to_i)}:#{dstPort} would be marked as #{m}"
            aggId = "#{dstIp}_#{dstPort}"
            yield(aggId, m)
        }
    }
    
end


def do_heuristic3(gtProto, dstPort)
    sql = <<SQL
SELECT DISTINCT DstIp FROM #{$gtvs.flows}
WHERE GtState='unverified' AND DstPort=#{dstPort} AND SrcIp IN 
(SELECT DISTINCT DstIp AS SrcIp FROM #{$gtvs.flows}
WHERE GtState='verified' AND GtProto='#{gtProto}' AND DstPort=#{dstPort})
SQL
    $db.query(sql) {|rs|
        #r = rs.fetch_hash
        rs.each {|r|
            dstIp = r[0]
            $stderr.puts "All flows to #{IPAddr.new_i(dstIp.to_i)}:#{dstPort} would be marked as #{gtProto}"
            aggId = "#{dstIp}_#{dstPort}"
            yield(aggId, gtProto)
        }
    }
    
end

def do_heuristic_https()
    sql = <<SQL
SELECT t1.DstIp, t2.DstPort, t2.Flows
FROM #{$gtvs.flowsAggByDstPortIp} AS t1
LEFT JOIN
#{$gtvs.flowsAggByDstPortIp} AS t2
ON t1.DstIp = t2.DstIp
WHERE t1.DstPort = 80 AND t2.DstPort = 443
AND t1.GtProtos = 'http' AND t1.GtVerified = t1.Flows
AND t2.L7Marks LIKE '%ssl%' AND t2.GtVerified = 0;
SQL
    gtProto = 'https'
    $db.query(sql) {|rs|
        #r = rs.fetch_hash
        rs.each {|r|
            dstIp, dstPort, flows = r
            $stderr.puts "All flows to #{IPAddr.new_i(dstIp.to_i)}:#{dstPort} would be marked as #{gtProto}"
            aggId = "#{dstIp}_#{dstPort}"
            yield aggId
        }
    }
end

def do_heuristic_client(srcIp, gtProto, dstPort = nil, srcPort = nil, l7Mark = nil)
    s = ''
    if dstPort == nil
    elsif dstPort.is_a?(Array)
        s = " AND DstPort IN ("+dstPort.join(',')+")"
    else
        s = " AND DstPort=#{dstPort} "
    end
    if srcPort == nil
    elsif srcPort.is_a?(Array)
        s += " AND SrcPort IN ("+srcPort.join(',')+")"
    else
        s += " AND SrcPort=#{srcPort} "
    end
    if l7Mark != nil
        s += " AND L7Mark='#{l7Mark}' "
    end
    sql = <<SQL
SELECT DstIp, DstPort, COUNT(*) AS Flows FROM #{$gtvs.flows}
WHERE GtState='unverified'#{s}
AND SrcIp = INET_ATON('#{srcIp}')
GROUP BY DstIp, DstPort
SQL
    $db.query(sql) {|rs|
        #r = rs.fetch_hash
        rs.each {|r|
            dstIp, dstPort, flows = r
            $stderr.puts "All #{flows} flows to #{IPAddr.new_i(dstIp.to_i)}:#{dstPort} would be marked as #{gtProto}"
            aggId = "#{dstIp}_#{dstPort}"
            yield aggId
        }
    }
    
end

def do_heuristic_dstports(dstPorts, gtProto)
    sql = <<SQL
SELECT DstIp, DstPorts, Flows FROM #{$gtvs.flowsAggByDstIp}
WHERE GtVerified != Flows
AND DstPorts = '#{dstPorts}'
SQL
    $db.query(sql) {|rs|
        #r = rs.fetch_hash
        rs.each {|r|
            dstIp, dstPorts, flows = r
            dstPorts = dstPorts.split(',')
            dstPorts.each {|dstPort|
                $stderr.puts "All #{flows} flows to #{IPAddr.new_i(dstIp.to_i)}:#{dstPort} would be marked as #{gtProto}"
                aggId = "#{dstIp}_#{dstPort}"
                yield aggId
            }
        }
    }
    
end

load(hscript)

$db.close

