/*
 * Decompiled with CFR 0.152.
 */
package tsg.corpora;

import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.LinkedList;
import java.util.Scanner;
import settings.Parameters;
import tsg.TSNode;
import tsg.corpora.ConstCorpus;
import tsg.corpora.Wsj;
import util.DepConstConverter;
import util.FileUtil;
import util.Utility;

public class Parc
extends ConstCorpus {
    private static final long serialVersionUID = 0L;
    public static String PARC_GOLD = String.valueOf(Parameters.corpusPath) + "PARC/parc700_headGold";
    public static String PARC_GOLD_NOTOP = String.valueOf(Parameters.corpusPath) + "PARC/parc700_headGold_noTop";
    public static String PARC_GOLD_CLEANED_NOTOP = String.valueOf(Parameters.corpusPath) + "PARC/parc700_headGold_cleaned_noTop";

    public static void newParc() {
        File outputFileDependency = new File("/home/fsangati/CORPUS/PARC/parc700_readableNew");
        File outputFileHeadTrees = new File("/home/fsangati/CORPUS/PARC/parc700_headTreesNew");
        PrintWriter dependencyWriter = FileUtil.getPrintWriter(outputFileDependency);
        PrintWriter writerTrees = FileUtil.getPrintWriter(outputFileHeadTrees);
        String[] flatSentences = new String[700];
        TSNode[] trees = new TSNode[700];
        int[][][] wordBoundary = new int[700][][];
        int[][] indexConversion = new int[700][];
        int[][][] dependency = new int[700][][];
        Parc.readFlatSentences(flatSentences);
        Parc.readTrees(trees);
        Parc.readWordBoundary(wordBoundary, flatSentences, indexConversion);
        Parc.readDependency(dependency, indexConversion);
        int wrongAssignment = 0;
        int i = 0;
        while (i < 700) {
            String flatSentence = flatSentences[i];
            dependencyWriter.println(flatSentence);
            TSNode tree = trees[i];
            String[] report = new String[]{""};
            boolean right = DepConstConverter.assignHeadsFromDependencyTable(tree, wordBoundary[i], dependency[i], report);
            dependencyWriter.print(report[0]);
            dependencyWriter.println();
            writerTrees.println(tree.toString(true, false));
            if (!right) {
                ++wrongAssignment;
            }
            ++i;
        }
        dependencyWriter.close();
        writerTrees.close();
        System.out.println("Wrong assignments: " + wrongAssignment);
    }

    public static void readFlatSentences(String[] flatSentences) {
        File parcFileFlat = new File("/home/fsangati/CORPUS/PARC/sec23_selection_flat");
        try {
            Scanner scanFlat = new Scanner(parcFileFlat, "ISO-8859-1");
            int i = 0;
            while (i < 700) {
                flatSentences[i] = scanFlat.nextLine();
                ++i;
            }
            scanFlat.close();
        }
        catch (IOException e) {
            FileUtil.handleExceptions(e);
        }
    }

    public static void readTrees(TSNode[] trees) {
        File parcFileTree = new File("/home/fsangati/CORPUS/PARC/sec23_selection_compressed");
        try {
            Scanner scanTree = new Scanner(parcFileTree, "ISO-8859-1");
            int i = 0;
            while (i < 700) {
                trees[i] = new TSNode(scanTree.nextLine());
                ++i;
            }
            scanTree.close();
        }
        catch (IOException e) {
            FileUtil.handleExceptions(e);
        }
    }

    public static void readWordBoundary(int[][][] wordBoundary, String[] flatSentences, int[][] indexConversion) {
        File parcFileWords = new File("/home/fsangati/CORPUS/PARC/parc700_WordList");
        LinkedList<int[]> wordBlocks = new LinkedList<int[]>();
        LinkedList<Integer> sentenceIndexConversion = new LinkedList<Integer>();
        try {
            int currentLine = 0;
            int currentIndex = 0;
            int blockNumber = 0;
            String[] choices = flatSentences[0].split(" ");
            Scanner scanWordList = new Scanner(parcFileWords, "ISO-8859-1");
            while (scanWordList.hasNextLine()) {
                String wordListLine = scanWordList.nextLine();
                wordListLine.trim();
                int first_parenthesis = wordListLine.indexOf(40);
                int first_coma = wordListLine.indexOf(44);
                int second_coma = wordListLine.indexOf(44, first_coma + 1);
                int third_coma = wordListLine.indexOf(91) - 2;
                int lineIndex = Integer.parseInt(wordListLine.substring(first_parenthesis + 1, first_coma)) - 1;
                String word = wordListLine.substring(second_coma + 2, third_coma).toLowerCase();
                if (word.charAt(0) == '\'' && word.charAt(word.length() - 1) == '\'') {
                    word = word.substring(1, word.length() - 1);
                }
                word = word.replaceAll("\\\\\\'", " '").trim();
                String[] split = word.split("\\s+");
                int length = split.length;
                int[] indexesInBlock = new int[length];
                indexesInBlock[0] = -1;
                if (lineIndex != currentLine || !scanWordList.hasNextLine()) {
                    if (!scanWordList.hasNextLine()) {
                        wordBlocks.add(indexesInBlock);
                        sentenceIndexConversion.add(blockNumber);
                    }
                    wordBoundary[currentLine] = (int[][])wordBlocks.toArray((T[])new int[0][]);
                    indexConversion[currentLine] = Utility.intArrayConversion(sentenceIndexConversion);
                    wordBlocks.clear();
                    sentenceIndexConversion.clear();
                    currentLine = lineIndex;
                    choices = flatSentences[currentLine].split(" ");
                    currentIndex = 0;
                    blockNumber = 0;
                }
                String realWord = choices[currentIndex].toLowerCase().replaceAll("\\\\/", "/");
                boolean variation = Parc.variation(word, realWord);
                while (word.indexOf(realWord) == -1 && realWord.indexOf(word) == -1 && !variation) {
                    realWord = choices[++currentIndex].toLowerCase().replaceAll("\\\\/", "/");
                    variation = Parc.variation(word, realWord);
                }
                if (length == 1) {
                    if (word.equals(realWord) || variation) {
                        sentenceIndexConversion.add(blockNumber);
                        indexesInBlock[0] = currentIndex++;
                        ++blockNumber;
                    } else {
                        int index = realWord.indexOf(word);
                        sentenceIndexConversion.add(blockNumber);
                        if (index == 0) {
                            indexesInBlock[0] = currentIndex;
                        } else if (index + word.length() == realWord.length()) {
                            ++currentIndex;
                            ++blockNumber;
                        }
                    }
                } else {
                    int i = 0;
                    while (i < length) {
                        indexesInBlock[i] = currentIndex + i;
                        ++i;
                    }
                    sentenceIndexConversion.add(blockNumber);
                    currentIndex += length;
                    ++blockNumber;
                }
                if (indexesInBlock[0] == -1) continue;
                wordBlocks.add(indexesInBlock);
            }
            scanWordList.close();
        }
        catch (IOException e) {
            FileUtil.handleExceptions(e);
        }
    }

    public static boolean variation(String word, String realWord) {
        if (word.equals("can") && realWord.equals("ca")) {
            return true;
        }
        if (word.equals("not") && realWord.equals("n't")) {
            return true;
        }
        if (word.equals("is") && realWord.equals("'s")) {
            return true;
        }
        if (word.equals("has") && realWord.equals("'s")) {
            return true;
        }
        if (word.equals("have") && realWord.equals("'ve")) {
            return true;
        }
        if (word.equals("will") && realWord.equals("wo")) {
            return true;
        }
        if (word.equals("will") && realWord.equals("'ll")) {
            return true;
        }
        return word.equals("are") && realWord.equals("'re");
    }

    public static void readDependency(int[][][] dependency, int[][] indexConversion) {
        File parcFileDependency = new File("/home/fsangati/CORPUS/PARC/parc700_DepList");
        try {
            Scanner scanDependency = new Scanner(parcFileDependency, "ISO-8859-1");
            LinkedList<int[]> dependecyInSentece = new LinkedList<int[]>();
            int currentLine = 0;
            while (scanDependency.hasNextLine()) {
                String dependencyLine = scanDependency.nextLine();
                dependencyLine = dependencyLine.trim();
                int first_parenthesis = dependencyLine.indexOf(40);
                int first_coma = dependencyLine.indexOf(44);
                int second_coma = dependencyLine.indexOf(44, first_coma + 1);
                int third_coma = dependencyLine.indexOf(44, second_coma + 1);
                if (dependencyLine.charAt(first_coma + 2) != 'w' || dependencyLine.charAt(third_coma + 2) != 'w') continue;
                int lineIndex = Integer.parseInt(dependencyLine.substring(first_parenthesis + 1, first_coma)) - 1;
                int firstWordIndex = Integer.parseInt(dependencyLine.substring(first_coma + 4, second_coma - 1));
                int secondWordIndex = Integer.parseInt(dependencyLine.substring(third_coma + 4, dependencyLine.length() - 3));
                if (lineIndex != currentLine || !scanDependency.hasNextLine()) {
                    if (!scanDependency.hasNextLine()) {
                        dependecyInSentece.add(new int[]{firstWordIndex, secondWordIndex});
                    }
                    dependency[currentLine] = (int[][])dependecyInSentece.toArray((T[])new int[0][]);
                    dependecyInSentece.clear();
                    currentLine = lineIndex;
                }
                int[] dep = new int[]{indexConversion[currentLine][firstWordIndex], indexConversion[currentLine][secondWordIndex]};
                dependecyInSentece.add(dep);
            }
            scanDependency.close();
        }
        catch (IOException e) {
            FileUtil.handleExceptions(e);
        }
    }

    public static void main(String[] args) {
        Parc.training40PlusParc();
    }

    public static void training40PlusParc() {
        File trainingFile = new File(String.valueOf(Parameters.corpusPath) + "COLLINS_97/wsj-02-21.mrg");
        ConstCorpus trainingCorpus = new ConstCorpus(trainingFile, "collins97_02-21");
        trainingCorpus.removeTreesLongerThan(40, Wsj.nonCountCatInLength);
        File parcGoldFile = new File("/home/fsangati/CORPUS/PARC/parc700_headGold");
        ConstCorpus parc = new ConstCorpus(parcGoldFile, "parc");
        trainingCorpus.treeBank.addAll(parc.treeBank);
        Wsj.removeQuotations(trainingCorpus);
        String outputFile = "/home/fsangati/CORPUS/PARC/02_21_upto40_parc";
        trainingCorpus.toBinaryFile(new File(String.valueOf(outputFile) + ".binary"));
        trainingCorpus.toFile_Complete(new File(String.valueOf(outputFile) + ".complete"), false);
    }
}

