/*
 * Decompiled with CFR 0.152.
 */
package tsg.kernels;

import java.io.File;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Hashtable;
import java.util.ListIterator;
import java.util.Map;
import kernels.NodeSetCollector;
import kernels.NodeSetCollectorMUB;
import settings.Parameters;
import tsg.TSNodeLabel;
import tsg.TSNodeLabelIndex;
import tsg.TSNodeLabelStructure;
import tsg.corpora.Wsj;
import tsg.kernels.CommonSubtrees;
import util.FileUtil;
import util.PrintProgress;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class CommonSubtreesMUBFreq
extends CommonSubtrees {
    public static int learningCurvePoints = 50;
    Hashtable<TSNodeLabel, int[]> fragmentBank;
    int[] learningCurveDepth = new int[50];

    public CommonSubtreesMUBFreq(ArrayList<TSNodeLabelStructure> treebank) {
        super(treebank);
        this.fragmentBank = new Hashtable();
        NodeSetCollectorMUB collector = new NodeSetCollectorMUB();
        int t1Index = -1;
        PrintProgress.start("Extracting fragment from sentence :");
        for (TSNodeLabelStructure t1 : treebank) {
            PrintProgress.next();
            ListIterator<TSNodeLabelStructure> i2 = treebank.listIterator(++t1Index + 1);
            while (i2.hasNext()) {
                TSNodeLabelStructure t2 = i2.next();
                NodeSetCollector[][] CST = CommonSubtreesMUBFreq.getCST(t1, t2, (NodeSetCollector)collector);
                this.extractSubTrees(CST, t1, false);
            }
        }
        PrintProgress.end();
    }

    public CommonSubtreesMUBFreq(ArrayList<TSNodeLabelStructure> treebank, File traceLearningCurveFile) {
        super(treebank);
        int treebankSize = treebank.size();
        int learningCurveEvery = treebankSize * (treebankSize + 1) / 2 / learningCurvePoints;
        System.out.println("Printing learning curve every: " + learningCurveEvery);
        PrintWriter pw = FileUtil.getPrintWriter(traceLearningCurveFile);
        this.fragmentBank = new Hashtable();
        NodeSetCollectorMUB collector = new NodeSetCollectorMUB();
        int t1Index = -1;
        int learningCounter = 0;
        PrintProgress.start("Extracting fragment from sentence :");
        for (TSNodeLabelStructure t1 : treebank) {
            PrintProgress.next();
            ListIterator<TSNodeLabelStructure> i2 = treebank.listIterator(++t1Index + 1);
            while (i2.hasNext()) {
                TSNodeLabelStructure t2 = i2.next();
                NodeSetCollector[][] CST = CommonSubtreesMUBFreq.getCST(t1, t2, (NodeSetCollector)collector);
                this.extractSubTrees(CST, t1, true);
                if (++learningCounter % learningCurveEvery != 0) continue;
                this.storeLearningData(pw, learningCounter);
            }
        }
        this.storeLearningData(pw, learningCounter);
        PrintProgress.end();
        pw.close();
    }

    private void storeLearningData(PrintWriter pw, int learningCounter) {
        pw.print(String.valueOf(learningCounter) + "\t");
        int[] nArray = this.learningCurveDepth;
        int n = this.learningCurveDepth.length;
        int n2 = 0;
        while (n2 < n) {
            int i = nArray[n2];
            pw.print(String.valueOf(i) + "\t");
            ++n2;
        }
        pw.println();
        this.learningCurveDepth = new int[50];
    }

    private void extractSubTrees(NodeSetCollector[][] CPG, TSNodeLabelStructure s, boolean traceLearningCurve) {
        int nodeIndex = -1;
        NodeSetCollectorMUB finalNodeSet = new NodeSetCollectorMUB();
        NodeSetCollector[][] nodeSetCollectorArray = CPG;
        int n = CPG.length;
        int n2 = 0;
        while (n2 < n) {
            NodeSetCollector[] wordCollectors = nodeSetCollectorArray[n2];
            ++nodeIndex;
            NodeSetCollector[] nodeSetCollectorArray2 = wordCollectors;
            int n3 = wordCollectors.length;
            int n4 = 0;
            while (n4 < n3) {
                NodeSetCollector coll = nodeSetCollectorArray2[n4];
                finalNodeSet.addAll(coll);
                ++n4;
            }
            ++n2;
        }
        for (BitSet bs : finalNodeSet.bitSetSet) {
            TSNodeLabelIndex rootNode;
            TSNodeLabel fragment;
            if (bs.cardinality() == 1 || !this.add(fragment = (rootNode = s.structure[bs.nextSetBit(0)]).getSubTree(bs)) || !traceLearningCurve) continue;
            int n5 = fragment.maxDepth();
            this.learningCurveDepth[n5] = this.learningCurveDepth[n5] + 1;
        }
    }

    private boolean add(TSNodeLabel fragment) {
        int[] freq = this.fragmentBank.get(fragment);
        if (freq == null) {
            this.fragmentBank.put(fragment, new int[]{1});
            return true;
        }
        freq[0] = freq[0] + 1;
        return false;
    }

    public String toString() {
        StringBuilder sb = new StringBuilder("");
        for (Map.Entry<TSNodeLabel, int[]> entry : this.fragmentBank.entrySet()) {
            sb.append(entry.getKey().toString(false, true)).append("\t").append(entry.getValue()[0]);
        }
        return sb.toString();
    }

    public void printFragmentsToFile(File outpuFile) {
        PrintWriter pw = FileUtil.getPrintWriter(outpuFile);
        for (Map.Entry<TSNodeLabel, int[]> entry : this.fragmentBank.entrySet()) {
            pw.println(String.valueOf(entry.getKey().toString(false, true)) + "\t" + entry.getValue()[0]);
        }
        pw.close();
    }

    public String reportFragmentDepth() {
        int[] depthCountTypes = new int[50];
        long[] depthCountTokens = new long[50];
        for (Map.Entry<TSNodeLabel, int[]> entry : this.fragmentBank.entrySet()) {
            int depth;
            int n = depth = entry.getKey().maxDepth();
            depthCountTypes[n] = depthCountTypes[n] + 1;
            int n2 = depth;
            depthCountTokens[n2] = depthCountTokens[n2] + (long)entry.getValue()[0];
        }
        StringBuilder sb = new StringBuilder("");
        int d = 0;
        while (d < 50) {
            int typesD = depthCountTypes[d];
            long tokensD = depthCountTokens[d];
            if (typesD != 0) {
                sb.append("Depth " + d + ":\t" + typesD + "\t" + tokensD + "\n");
            }
            ++d;
        }
        return sb.toString();
    }

    public static void main1(String[] args) throws Exception {
        File inputFile = new File("tmp/FewParseTrees.txt");
        ArrayList<TSNodeLabelStructure> treebank = TSNodeLabelStructure.readTreebank(inputFile, FileUtil.defaultEncoding, 1000);
        TSNodeLabelStructure.removeSemanticTagInTreebank(treebank);
        for (TSNodeLabelStructure t : treebank) {
            System.out.println(t.structure()[0].toString(false, true));
        }
    }

    public static void main(String[] args) throws Exception {
        File inputFile = new File(String.valueOf(Wsj.WsjOriginalCleaned) + "wsj-02-21.mrg");
        String outputFolder = String.valueOf(Parameters.resultsPath) + "TSG/TSGkernels/";
        new File(outputFolder).mkdirs();
        File learningCurveFile = new File(String.valueOf(outputFolder) + "learningFragments_NoSemTag_MUB_freq_all.txt");
        ArrayList<TSNodeLabelStructure> treebank = TSNodeLabelStructure.readTreebank(inputFile, FileUtil.defaultEncoding, 10000);
        TSNodeLabelStructure.removeSemanticTagInTreebank(treebank);
        System.out.println("Treebank size: " + treebank.size());
        CommonSubtreesMUBFreq cs = new CommonSubtreesMUBFreq(treebank, learningCurveFile);
        String fragmentDepthReport = cs.reportFragmentDepth();
        FileUtil.append(fragmentDepthReport, new File(String.valueOf(outputFolder) + "fragmentDepthReport_NoSemTag_MUB_freq_all.txt"));
        System.out.println(fragmentDepthReport);
        cs.printFragmentsToFile(new File(String.valueOf(outputFolder) + "fragments_NoSemTag_MUB_freq_all.txt"));
    }
}

