/*
 * Decompiled with CFR 0.152.
 */
package tsg.kernels;

import java.io.File;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Hashtable;
import java.util.ListIterator;
import java.util.Map;
import kernels.NodeSetCollector;
import kernels.NodeSetCollectorMUB;
import kernels.NodeSetCollectorSimple;
import kernels.NodeSetCollectorUnion;
import settings.Parameters;
import tsg.TSNodeLabel;
import tsg.TSNodeLabelIndex;
import tsg.TSNodeLabelStructure;
import tsg.corpora.TUT09;
import tsg.corpora.Wsj;
import tsg.kernels.CommonSubtrees;
import util.FileUtil;
import util.PrintProgress;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class CommonSubtreesMUBFreqNew
extends CommonSubtrees {
    public static int learningCurvePoints = 50;
    public static int maxDepth = 50;
    Hashtable<TSNodeLabel, int[]> fragmentBank = new Hashtable();
    PrintWriter pw_C0;
    PrintWriter pw_C1;
    PrintWriter pw_Tot;

    public CommonSubtreesMUBFreqNew(ArrayList<TSNodeLabelStructure> treebank) {
        super(treebank);
    }

    public void extractFromTreebank() {
        NodeSetCollectorUnion collector = new NodeSetCollectorUnion();
        int t1Index = -1;
        PrintProgress.start("Extracting fragment from sentence :");
        for (TSNodeLabelStructure t1 : this.treebank) {
            PrintProgress.next();
            NodeSetCollectorSimple intermediateCollector = new NodeSetCollectorSimple();
            ListIterator i2 = this.treebank.listIterator(++t1Index + 1);
            while (i2.hasNext()) {
                TSNodeLabelStructure t2 = (TSNodeLabelStructure)i2.next();
                NodeSetCollector[][] CST = CommonSubtreesMUBFreqNew.getCST(t1, t2, (NodeSetCollector)collector);
                this.extractSubTreesIntermediate(CST, (NodeSetCollector)intermediateCollector);
            }
            this.extractSubTreesInTable(intermediateCollector, t1);
        }
        PrintProgress.end();
    }

    public void extractFromTreebankAndLearningCurve(File traceLearningCurveFileC0, File traceLearningCurveFileC1, File traceLearningCurveFileTot) {
        this.pw_C0 = FileUtil.getPrintWriter(traceLearningCurveFileC0);
        this.pw_C1 = FileUtil.getPrintWriter(traceLearningCurveFileC1);
        this.pw_Tot = FileUtil.getPrintWriter(traceLearningCurveFileTot);
        NodeSetCollectorUnion collectorType = new NodeSetCollectorUnion();
        int t1IndexOneBased = 1;
        PrintProgress.start("Extracting fragment from sentence :");
        for (TSNodeLabelStructure t1 : this.treebank) {
            PrintProgress.next();
            NodeSetCollectorSimple intermediateCollector = new NodeSetCollectorSimple();
            ListIterator i2 = this.treebank.listIterator(t1IndexOneBased);
            while (i2.hasNext()) {
                TSNodeLabelStructure t2 = (TSNodeLabelStructure)i2.next();
                NodeSetCollector[][] CST = CommonSubtreesMUBFreqNew.getCST(t1, t2, (NodeSetCollector)collectorType);
                this.extractSubTreesIntermediate(CST, (NodeSetCollector)intermediateCollector);
            }
            this.extractSubTreesInTableAndTraceCurve(intermediateCollector, t1, t1IndexOneBased);
            ++t1IndexOneBased;
        }
        PrintProgress.end();
        this.pw_C0.close();
        this.pw_C1.close();
        this.pw_Tot.close();
    }

    private void extractSubTreesIntermediate(NodeSetCollector[][] CPG, NodeSetCollector intermediateCollector) {
        int nodeIndex = -1;
        NodeSetCollectorMUB finalNodeSet = new NodeSetCollectorMUB();
        NodeSetCollector[][] nodeSetCollectorArray = CPG;
        int n = CPG.length;
        int n2 = 0;
        while (n2 < n) {
            NodeSetCollector[] wordCollectors = nodeSetCollectorArray[n2];
            ++nodeIndex;
            NodeSetCollector[] nodeSetCollectorArray2 = wordCollectors;
            int n3 = wordCollectors.length;
            int n4 = 0;
            while (n4 < n3) {
                NodeSetCollector coll = nodeSetCollectorArray2[n4];
                BitSet singleBS = ((NodeSetCollectorUnion)coll).singleBS();
                if (singleBS != null) {
                    finalNodeSet.add(singleBS);
                }
                ++n4;
            }
            ++n2;
        }
        for (BitSet bs : finalNodeSet.bitSetSet) {
            if (bs.cardinality() == 1) continue;
            intermediateCollector.add(bs);
        }
    }

    private void extractSubTreesInTableAndTraceCurve(NodeSetCollectorSimple intermediateCollector, TSNodeLabelStructure s, int treeCounter) {
        int[] learningCurveDepth_C0 = new int[maxDepth];
        int[] learningCurveDepth_C1 = new int[maxDepth];
        int[] learningCurveDepth_Tot = new int[maxDepth];
        for (BitSet bs : intermediateCollector.bitSetSet) {
            int depth;
            TSNodeLabelIndex rootNode = s.structure[bs.nextSetBit(0)];
            TSNodeLabel fragment = rootNode.getSubTree(bs);
            int freq = this.add(fragment);
            int n = depth = fragment.maxDepth();
            learningCurveDepth_Tot[n] = learningCurveDepth_Tot[n] + 1;
            if (freq == 1) {
                int n2 = depth;
                learningCurveDepth_C0[n2] = learningCurveDepth_C0[n2] + 1;
                continue;
            }
            if (freq != 2) continue;
            int n3 = depth;
            learningCurveDepth_C1[n3] = learningCurveDepth_C1[n3] + 1;
        }
        this.pw_C0.print(String.valueOf(treeCounter) + "\t");
        this.pw_C1.print(String.valueOf(treeCounter) + "\t");
        this.pw_Tot.print(String.valueOf(treeCounter) + "\t");
        int i = 0;
        while (i < maxDepth) {
            int C0i = learningCurveDepth_C0[i];
            int C1i = learningCurveDepth_C1[i];
            int CTi = learningCurveDepth_Tot[i];
            this.pw_C0.print(String.valueOf(CommonSubtreesMUBFreqNew.ratio(C0i, CTi)) + "\t");
            this.pw_C1.print(String.valueOf(CommonSubtreesMUBFreqNew.ratio(C1i, CTi)) + "\t");
            this.pw_Tot.print(String.valueOf(CTi) + "\t");
            ++i;
        }
        this.pw_C0.println();
        this.pw_C1.println();
        this.pw_Tot.println();
        this.pw_C0.flush();
        this.pw_C1.flush();
        this.pw_Tot.flush();
    }

    public static float ratio(int a, int b) {
        if (a == 0) {
            return a;
        }
        return (float)a / (float)b;
    }

    private void extractSubTreesInTable(NodeSetCollectorSimple intermediateCollector, TSNodeLabelStructure s) {
        for (BitSet bs : intermediateCollector.bitSetSet) {
            TSNodeLabelIndex rootNode = s.structure[bs.nextSetBit(0)];
            TSNodeLabel fragment = rootNode.getSubTree(bs);
            this.add(fragment);
        }
    }

    private int add(TSNodeLabel fragment) {
        int[] freq = this.fragmentBank.get(fragment);
        if (freq == null) {
            this.fragmentBank.put(fragment, new int[]{1});
            return 1;
        }
        freq[0] = freq[0] + 1;
        return freq[0];
    }

    public String toString() {
        StringBuilder sb = new StringBuilder("");
        for (Map.Entry<TSNodeLabel, int[]> entry : this.fragmentBank.entrySet()) {
            sb.append(String.valueOf(entry.getKey().toString(false, true)) + "\t" + entry.getValue()[0] + "\n");
        }
        return sb.toString();
    }

    public void printFragmentsToFile(File outpuFile) {
        PrintWriter pw = FileUtil.getPrintWriter(outpuFile);
        for (Map.Entry<TSNodeLabel, int[]> entry : this.fragmentBank.entrySet()) {
            pw.println(String.valueOf(entry.getKey().toString(false, true)) + "\t" + entry.getValue()[0]);
        }
        pw.close();
    }

    public String reportFragmentDepth() {
        int[] depthCountTypes = new int[50];
        int[] depthCountTypesFG1 = new int[50];
        long[] depthCountTokens = new long[50];
        for (Map.Entry<TSNodeLabel, int[]> entry : this.fragmentBank.entrySet()) {
            int depth = entry.getKey().maxDepth();
            int freq = entry.getValue()[0];
            int n = depth;
            depthCountTypes[n] = depthCountTypes[n] + 1;
            int n2 = depth;
            depthCountTokens[n2] = depthCountTokens[n2] + (long)freq;
            if (freq <= 1) continue;
            int n3 = depth;
            depthCountTypesFG1[n3] = depthCountTypesFG1[n3] + 1;
        }
        StringBuilder sb = new StringBuilder("");
        sb.append("Depth\t#types\t#tokens\t#types>1\n");
        int d = 0;
        while (d < 50) {
            int typesD = depthCountTypes[d];
            int typesDFG1 = depthCountTypesFG1[d];
            long tokensD = depthCountTokens[d];
            if (typesD != 0) {
                sb.append(String.valueOf(d) + "\t" + typesD + "\t" + tokensD + "\t" + typesDFG1 + "\n");
            }
            ++d;
        }
        return sb.toString();
    }

    public static void mainDummy(String[] args) throws Exception {
        File inputFile = new File("tmp/FewParseTrees1.txt");
        ArrayList<TSNodeLabelStructure> treebank = TSNodeLabelStructure.readTreebank(inputFile, FileUtil.defaultEncoding, 10);
        CommonSubtreesMUBFreqNew cs = new CommonSubtreesMUBFreqNew(treebank);
        System.out.println(cs.toString());
    }

    public static void mainWsj() throws Exception {
        System.out.println("Max depth: " + maxDepth);
        File inputFile = new File(String.valueOf(Wsj.WsjOriginalCleanedTop) + "wsj-02-21.mrg");
        String outputFolder = String.valueOf(Parameters.resultsPath) + "TSG/TSGkernels/Wsj/KenelFragments/SemTagOff_Top/all/";
        new File(outputFolder).mkdirs();
        File learningCurveFileC0 = new File(String.valueOf(outputFolder) + "learningFragments_C0_MUB_freq_all.txt");
        File learningCurveFileC1 = new File(String.valueOf(outputFolder) + "learningFragments_C1_MUB_freq_all.txt");
        File learningCurveFileTot = new File(String.valueOf(outputFolder) + "learningFragments_Tot_MUB_freq_all.txt");
        ArrayList<TSNodeLabelStructure> treebank = TSNodeLabelStructure.readTreebank(inputFile, FileUtil.defaultEncoding, 20000);
        TSNodeLabelStructure.removeSemanticTagInTreebank(treebank);
        System.out.println("Treebank size: " + treebank.size());
        CommonSubtreesMUBFreqNew cs = new CommonSubtreesMUBFreqNew(treebank);
        cs.extractFromTreebankAndLearningCurve(learningCurveFileC0, learningCurveFileC1, learningCurveFileTot);
        String fragmentDepthReport = cs.reportFragmentDepth();
        FileUtil.append(fragmentDepthReport, new File(String.valueOf(outputFolder) + "fragmentDepthReport_MUB_freq_all.txt"));
        System.out.println(fragmentDepthReport);
        cs.printFragmentsToFile(new File(String.valueOf(outputFolder) + "fragments_MUB_freq_all.txt"));
    }

    public static void mainEvalita() throws Exception {
        System.out.println("Max depth: " + maxDepth);
        File inputFile = TUT09.trainFileNoTracesNoSemTags;
        String outputFolder = String.valueOf(Parameters.resultsPath) + "TSG/TSGkernels/TUT09/" + FileUtil.dataFolder() + "/";
        new File(outputFolder).mkdirs();
        File learningCurveFileC0 = new File(String.valueOf(outputFolder) + "learningFragments_C0_MUB_freq_all.txt");
        File learningCurveFileC1 = new File(String.valueOf(outputFolder) + "learningFragments_C1_MUB_freq_all.txt");
        File learningCurveFileTot = new File(String.valueOf(outputFolder) + "learningFragments_Tot_MUB_freq_all.txt");
        ArrayList<TSNodeLabelStructure> treebank = TSNodeLabelStructure.readTreebank(inputFile, "UTF-8", 20000);
        System.out.println("Treebank size: " + treebank.size());
        CommonSubtreesMUBFreqNew cs = new CommonSubtreesMUBFreqNew(treebank);
        cs.extractFromTreebankAndLearningCurve(learningCurveFileC0, learningCurveFileC1, learningCurveFileTot);
        String fragmentDepthReport = cs.reportFragmentDepth();
        FileUtil.append(fragmentDepthReport, new File(String.valueOf(outputFolder) + "fragmentDepthReport_MUB_freq_all.txt"));
        System.out.println(fragmentDepthReport);
        cs.printFragmentsToFile(new File(String.valueOf(outputFolder) + "fragments_MUB_freq_all.txt"));
    }

    public static void main(String[] args) throws Exception {
        CommonSubtreesMUBFreqNew.mainWsj();
    }
}

