/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.international.spanish.pipeline;

import edu.stanford.nlp.international.spanish.pipeline.MultiWordPreprocessor;
import edu.stanford.nlp.international.spanish.pipeline.MultiWordTreeExpander;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.stats.Counters;
import edu.stanford.nlp.stats.TwoDimensionalCounter;
import edu.stanford.nlp.trees.LabeledScoredTreeFactory;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeFactory;
import edu.stanford.nlp.trees.TreeNormalizer;
import edu.stanford.nlp.trees.TreeReader;
import edu.stanford.nlp.trees.TreebankTagUpdater;
import edu.stanford.nlp.trees.Trees;
import edu.stanford.nlp.trees.international.spanish.SpanishSplitTreeNormalizer;
import edu.stanford.nlp.trees.international.spanish.SpanishTreeNormalizer;
import edu.stanford.nlp.trees.international.spanish.SpanishTreebankLanguagePack;
import edu.stanford.nlp.trees.international.spanish.SpanishXMLTreeReaderFactory;
import edu.stanford.nlp.trees.tregex.TregexMatcher;
import edu.stanford.nlp.trees.tregex.TregexPattern;
import edu.stanford.nlp.util.CollectionUtils;
import edu.stanford.nlp.util.Factory;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.PropertiesUtils;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.concurrent.MulticoreWrapper;
import edu.stanford.nlp.util.concurrent.ThreadsafeProcessor;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.ObjectInputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.ExecutionException;
import java.util.function.Predicate;

public class AnCoraProcessor {
    private static Redwood.RedwoodChannels log = Redwood.channels(AnCoraProcessor.class);
    private final List<File> inputFiles;
    private final Properties options;
    private final TwoDimensionalCounter<String, String> unigramTagger;
    private final boolean expandElisions;
    private final boolean expandConmigo;
    private static TreeNormalizer splittingNormalizer = new SpanishSplitTreeNormalizer();
    private static TreeFactory splittingTreeFactory = new LabeledScoredTreeFactory();
    private static final TregexPattern pSplitPoint = TregexPattern.compile("fp $+ /^[^f]/ > S|sentence");
    public static HashSet<String> auxTagConversion = new HashSet<String>(Arrays.asList("vsip000,es", "vaip000,ha", "vaip000,han", "vsis000,fue", "vsn0000,ser", "vsip000,son", "vmip000,est\u00e1", "vaii000,hab\u00eda", "vsp0000,sido", "vmip000,puede", "vaip000,hay", "vsii000,era", "vsif000,ser\u00e1", "van0000,haber", "vmip000,est\u00e1n", "vsip000,Es", "vsis000,fueron", "vssp000,sea", "vmip000,debe", "vmic000,podr\u00eda", "vsic000,ser\u00eda", "vmii000,estaba", "vasp000,haya", "vaii000,hab\u00edan", "vaip000,hemos", "vaip000,he", "vsii000,eran", "vsg0000,siendo", "vmn0000,poder", "vmip000,deben"));
    public static HashSet<String> potentialAUXWords = new HashSet<String>(Arrays.asList("es", "ha", "han", "fue", "ser", "son", "est\u00e1", "hab\u00eda", "sido", "puede", "hay", "era", "ser\u00e1", "haber", "est\u00e1n", "Es", "fueron", "sea", "debe", "pueden", "podr\u00eda", "ser\u00eda", "estaba", "haya", "hab\u00edan", "hemos", "he", "eran", "siendo", "poder", "deben"));
    private static final String usage = String.format("Usage: java %s [OPTIONS] file(s)%n%n", AnCoraProcessor.class.getName()) + "Options:\n    -unigramTagger <tagger_path>: Path to a serialized `TwoDimensionalCounter` which\n        should be used for unigram tagging in multi-word token expansion. If this option\n        is not provided, a unigram tagger will be built from the provided corpus data.\n        (This option is useful if you are processing splits of the corpus separately but\n        want each step to benefit from a complete tagger.)\n    -ner: Add NER-specific information to trees\n    -generateTags: build tags with this model\n    -expandElisions: MWT expand words like del, al\n    -expandConmigo: MWT expand words like conmigo, contigo\n    -convertToUD: Convert part-of-speech tags to UD\n";
    private static final Map<String, Integer> argOptionDefs = new HashMap<String, Integer>();

    public AnCoraProcessor(List<File> inputFiles, Properties options) throws IOException, ClassNotFoundException {
        this.inputFiles = inputFiles;
        this.options = options;
        if (options.containsKey("unigramTagger")) {
            ObjectInputStream ois = new ObjectInputStream(new FileInputStream(options.getProperty("unigramTagger")));
            this.unigramTagger = (TwoDimensionalCounter)ois.readObject();
        } else {
            this.unigramTagger = new TwoDimensionalCounter();
        }
        this.expandElisions = PropertiesUtils.getBool(options, "expandElisions", false);
        this.expandConmigo = PropertiesUtils.getBool(options, "expandConmigo", false);
    }

    public List<Tree> process() throws InterruptedException, IOException, ExecutionException {
        List<Tree> trees = this.loadTrees();
        trees = this.fixMultiWordTokens(trees);
        return trees;
    }

    private List<Tree> loadTrees() throws InterruptedException, IOException, ExecutionException {
        boolean ner = PropertiesUtils.getBool(this.options, "ner", false);
        String encoding = new SpanishTreebankLanguagePack().getEncoding();
        SpanishXMLTreeReaderFactory trf = new SpanishXMLTreeReaderFactory(true, true, ner, false, this.expandElisions, this.expandConmigo);
        ArrayList<Tree> trees = new ArrayList<Tree>();
        for (File file : this.inputFiles) {
            Pair<TwoDimensionalCounter<String, String>, List<Tree>> ret = AnCoraProcessor.processTreeFile(file, trf, encoding);
            Counters.addInPlace(this.unigramTagger, ret.first());
            trees.addAll((Collection<Tree>)ret.second());
        }
        return trees;
    }

    private static Pair<TwoDimensionalCounter<String, String>, List<Tree>> processTreeFile(File file, SpanishXMLTreeReaderFactory trf, String encoding) {
        TwoDimensionalCounter<String, String> tagger = new TwoDimensionalCounter<String, String>();
        try {
            Tree t;
            BufferedReader in = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(file), encoding));
            TreeReader tr = trf.newTreeReader(file.getPath(), in);
            ArrayList<Tree> trees = new ArrayList<Tree>();
            while ((t = tr.readTree()) != null) {
                Tree splitPoint;
                do {
                    splitPoint = AnCoraProcessor.findSplitPoint(t);
                    Pair<Tree, Tree> split = AnCoraProcessor.split(t, splitPoint);
                    Tree toAdd = split.first();
                    t = split.second();
                    trees.add(toAdd);
                    AnCoraProcessor.updateTagger(tagger, toAdd);
                } while (splitPoint != null);
            }
            tr.close();
            return new Pair<TwoDimensionalCounter<String, String>, List<Tree>>(tagger, trees);
        }
        catch (IOException e) {
            e.printStackTrace();
            return null;
        }
    }

    private static void updateTagger(TwoDimensionalCounter<String, String> tagger, Tree t) {
        List<CoreLabel> yield = t.taggedLabeledYield();
        for (CoreLabel label : yield) {
            if (label.tag().equals("MW?")) continue;
            tagger.incrementCount(label.word(), label.tag());
        }
    }

    static Pair<Tree, Tree> split(Tree t, Tree splitPoint) {
        if (splitPoint == null) {
            return new Pair<Tree, Object>(t, null);
        }
        Tree left = t.prune(new LeftOfFilter(splitPoint, t));
        Tree right = t.prune(new RightOfExclusiveFilter(splitPoint, t));
        left = splittingNormalizer.normalizeWholeTree(left, splittingTreeFactory);
        right = splittingNormalizer.normalizeWholeTree(right, splittingTreeFactory);
        return new Pair<Tree, Tree>(left, right);
    }

    static Tree findSplitPoint(Tree t) {
        TregexMatcher m = pSplitPoint.matcher(t);
        if (m.find()) {
            return m.getMatch();
        }
        return null;
    }

    private List<Tree> fixMultiWordTokens(List<Tree> trees) throws InterruptedException, ExecutionException {
        boolean ner = PropertiesUtils.getBool(this.options, "ner", false);
        Factory<TreeNormalizer> tnf = new Factory<TreeNormalizer>(){

            @Override
            public TreeNormalizer create() {
                return new SpanishTreeNormalizer(true, false, false);
            }
        };
        LabeledScoredTreeFactory tf = new LabeledScoredTreeFactory();
        MultiWordProcessor processor = new MultiWordProcessor(tnf, tf, ner);
        int availableProcessors = Runtime.getRuntime().availableProcessors();
        MulticoreWrapper<Collection<Tree>, Collection<Tree>> wrapper = new MulticoreWrapper<Collection<Tree>, Collection<Tree>>(availableProcessors, processor, false);
        int numChunks = availableProcessors * 20;
        List<List<Tree>> chunked = CollectionUtils.partitionIntoFolds(trees, numChunks);
        ArrayList<Tree> ret = new ArrayList<Tree>();
        for (Collection collection : chunked) {
            wrapper.put(collection);
            while (wrapper.peek()) {
                ret.addAll(wrapper.poll());
            }
        }
        wrapper.join();
        while (wrapper.peek()) {
            ret.addAll(wrapper.poll());
        }
        return ret;
    }

    public static void convertTreeTagsToUD(Tree tree) {
        for (Tree t : tree.getChildrenAsList()) {
            if (t.isPreTerminal()) {
                if (t.label().value().startsWith("a")) {
                    t.setLabel(CoreLabel.wordFromString("ADJ"));
                    continue;
                }
                if (t.label().value().startsWith("d")) {
                    t.setLabel(CoreLabel.wordFromString("DET"));
                    continue;
                }
                if (t.label().value().startsWith("f")) {
                    if (t.getChild(0).label().value().matches("[^0-9]+")) {
                        t.setLabel(CoreLabel.wordFromString("PUNCT"));
                        continue;
                    }
                    if (t.getChild(0).label().value().matches("[0-9]+")) {
                        t.setLabel(CoreLabel.wordFromString("NUM"));
                        continue;
                    }
                    System.err.println(t.label().value() + "\t" + t.getChild(0).label().value());
                    continue;
                }
                if (t.label().value().equals("i")) {
                    t.setLabel(CoreLabel.wordFromString("INTJ"));
                    continue;
                }
                if (t.label().value().startsWith("n")) {
                    if (t.label().value().equals("np00000") && t.getChild(0).label().value().substring(0, 1).matches("^[A-Z]")) {
                        t.setLabel(CoreLabel.wordFromString("PROPN"));
                        continue;
                    }
                    t.setLabel(CoreLabel.wordFromString("NOUN"));
                    continue;
                }
                if (t.label().value().startsWith("p")) {
                    t.setLabel(CoreLabel.wordFromString("PRON"));
                    continue;
                }
                if (t.label().value().startsWith("r")) {
                    t.setLabel(CoreLabel.wordFromString("ADV"));
                    continue;
                }
                if (t.label().value().startsWith("s")) {
                    t.setLabel(CoreLabel.wordFromString("ADP"));
                    continue;
                }
                if (t.label().value().startsWith("v")) {
                    String ancoraTag = t.label().value();
                    String word = t.getChild(0).label().value();
                    if (potentialAUXWords.contains(word) && auxTagConversion.contains(String.format("%s,%s", ancoraTag, word))) {
                        t.setLabel(CoreLabel.wordFromString("AUX"));
                        continue;
                    }
                    t.setLabel(CoreLabel.wordFromString("VERB"));
                    continue;
                }
                if (t.label().value().startsWith("z")) {
                    if (t.getChild(0).label().value().matches("[A-Z][A-Z0-9]+")) {
                        t.setLabel(CoreLabel.wordFromString("PROPN"));
                        continue;
                    }
                    if (t.getChild(0).label().value().matches("[A-Z0-9]+[A-Z]")) {
                        t.setLabel(CoreLabel.wordFromString("PROPN"));
                        continue;
                    }
                    if (t.getChild(0).label().value().matches("[^0-9]+")) {
                        t.setLabel(CoreLabel.wordFromString("NOUN"));
                        continue;
                    }
                    if (t.getChild(0).label().value().matches("[0-9\\.\\,\u00ba:]+")) {
                        t.setLabel(CoreLabel.wordFromString("NUM"));
                        continue;
                    }
                    if (t.getChild(0).label().value().matches("m\\.[0-9]+(\\:)?")) {
                        t.setLabel(CoreLabel.wordFromString("NUM"));
                        continue;
                    }
                    if (t.getChild(0).label().value().matches("[0-9]+cc")) {
                        t.setLabel(CoreLabel.wordFromString("NUM"));
                        continue;
                    }
                    System.err.println(t.label().value() + "\t" + t.getChild(0).label().value());
                    continue;
                }
                if (t.label().value().equals("cc")) {
                    t.setLabel(CoreLabel.wordFromString("CCONJ"));
                    continue;
                }
                if (t.label().value().equals("cs")) {
                    t.setLabel(CoreLabel.wordFromString("SCONJ"));
                    continue;
                }
                if (t.label().value().equals("w")) {
                    if (t.getChild(0).label().value().matches("[^0-9]+")) {
                        t.setLabel(CoreLabel.wordFromString("NOUN"));
                        continue;
                    }
                    if (t.getChild(0).label().value().matches("[0-9]{4}|[0-9]+\\'")) {
                        t.setLabel(CoreLabel.wordFromString("NOUN"));
                        continue;
                    }
                    if (t.getChild(0).label().value().matches("[0-9\\.\\,]+")) {
                        t.setLabel(CoreLabel.wordFromString("NUM"));
                        continue;
                    }
                    if (t.getChild(0).label().value().matches("m\\.[0-9]+")) {
                        t.setLabel(CoreLabel.wordFromString("NOUN"));
                        continue;
                    }
                    System.err.println(t.label().value() + "\t" + t.getChild(0).label().value());
                    continue;
                }
                System.err.println(t.label().value() + "\t" + t.getChild(0).label().value());
                continue;
            }
            AnCoraProcessor.convertTreeTagsToUD(t);
        }
    }

    public static void main(String[] args) throws InterruptedException, IOException, ExecutionException, ClassNotFoundException {
        if (args.length < 1) {
            log.info(usage);
        }
        Properties options = StringUtils.argsToProperties(args, argOptionDefs);
        String[] remainingArgs = options.getProperty("").split(" ");
        ArrayList<File> fileList = new ArrayList<File>();
        for (String arg : remainingArgs) {
            fileList.add(new File(arg));
        }
        AnCoraProcessor processor = new AnCoraProcessor(fileList, options);
        List<Tree> trees = processor.process();
        boolean convertToUD = PropertiesUtils.getBool(options, "convertToUD");
        if (convertToUD) {
            for (Tree t : trees) {
                AnCoraProcessor.convertTreeTagsToUD(t);
            }
        }
        boolean generateTags = PropertiesUtils.getBool(options, "generateTags");
        String partOfSpeechModel = options.getProperty("generateTagsModel", "edu/stanford/nlp/models/pos-tagger/spanish-ud.tagger");
        if (generateTags && partOfSpeechModel != "") {
            TreebankTagUpdater spanishTagger = new TreebankTagUpdater(partOfSpeechModel);
            for (Tree t : trees) {
                spanishTagger.tagTree(t);
            }
        }
        for (Tree t : trees) {
            System.out.println(t);
        }
    }

    static {
        argOptionDefs.put("unigramTagger", 1);
        argOptionDefs.put("ner", 0);
        argOptionDefs.put("convertToUD", 0);
        argOptionDefs.put("generateTags", 0);
    }

    private class MultiWordProcessor
    implements ThreadsafeProcessor<Collection<Tree>, Collection<Tree>> {
        private final SpanishTreeNormalizer tn;
        private final Factory<TreeNormalizer> tnf;
        private final TreeFactory tf;
        private final boolean ner;

        public MultiWordProcessor(Factory<TreeNormalizer> tnf, TreeFactory tf, boolean ner) {
            this.tnf = tnf;
            this.tn = (SpanishTreeNormalizer)tnf.create();
            this.tf = tf;
            this.ner = ner;
        }

        @Override
        public Collection<Tree> process(Collection<Tree> coll) {
            ArrayList<Tree> ret = new ArrayList<Tree>();
            MultiWordTreeExpander expander = new MultiWordTreeExpander();
            for (Tree t : coll) {
                MultiWordPreprocessor.traverseAndFix(t, null, AnCoraProcessor.this.unigramTagger, this.ner);
                t = expander.expandPhrases(t, this.tn, this.tf);
                t = this.tn.normalizeWholeTree(t, this.tf, AnCoraProcessor.this.expandElisions, AnCoraProcessor.this.expandConmigo);
                ret.add(t);
            }
            return ret;
        }

        @Override
        public ThreadsafeProcessor<Collection<Tree>, Collection<Tree>> newInstance() {
            return new MultiWordProcessor(this.tnf, this.tf, this.ner);
        }
    }

    private static class RightOfExclusiveFilter
    implements Predicate<Tree>,
    Serializable {
        private static final long serialVersionUID = 8283161954004080591L;
        private Tree root;
        private Tree firstToKeep;

        private RightOfExclusiveFilter(Tree reference, Tree root) {
            this.root = root;
            this.firstToKeep = this.getFollowingTerminal(reference, root);
        }

        @Override
        public boolean test(Tree obj) {
            if (obj.dominates(this.firstToKeep)) {
                return true;
            }
            Tree leftmostDescendant = this.getLeftmostDescendant(obj);
            return Trees.rightEdge(leftmostDescendant, this.root) > Trees.leftEdge(this.firstToKeep, this.root);
        }

        private Tree getFollowingTerminal(Tree terminal, Tree root) {
            Tree sibling = this.getRightSiblingOrRightAncestor(terminal, root);
            if (sibling == null) {
                return null;
            }
            return this.getLeftmostDescendant(sibling);
        }

        private Tree getRightSiblingOrRightAncestor(Tree t, Tree root) {
            Tree parent = t.parent(root);
            if (parent == null) {
                return null;
            }
            int idxWithinParent = parent.objectIndexOf(t);
            if (idxWithinParent < parent.numChildren() - 1) {
                return parent.getChild(idxWithinParent + 1);
            }
            return this.getRightSiblingOrRightAncestor(parent, root);
        }

        private Tree getLeftmostDescendant(Tree t) {
            if (t.isLeaf()) {
                return t;
            }
            return this.getLeftmostDescendant(t.children()[0]);
        }
    }

    private static class LeftOfFilter
    implements Predicate<Tree>,
    Serializable {
        private static final long serialVersionUID = -5146948439247427344L;
        private Tree reference;
        private Tree root;

        private LeftOfFilter(Tree reference, Tree root) {
            this.reference = reference;
            this.root = root;
        }

        @Override
        public boolean test(Tree obj) {
            if (obj == this.reference || obj.dominates(this.reference) || this.reference.dominates(obj)) {
                return true;
            }
            Tree rightmostDescendant = this.getRightmostDescendant(obj);
            return Trees.rightEdge(rightmostDescendant, this.root) <= Trees.leftEdge(this.reference, this.root);
        }

        private Tree getRightmostDescendant(Tree t) {
            if (t.isLeaf()) {
                return t;
            }
            return this.getRightmostDescendant(t.children()[t.children().length - 1]);
        }
    }
}

