/*
 * Decompiled with CFR 0.152.
 */
package de.unihd.dbs.uima.annotator.treetagger;

import de.unihd.dbs.uima.annotator.heideltime.resources.Language;
import de.unihd.dbs.uima.annotator.heideltime.utilities.Logger;
import de.unihd.dbs.uima.annotator.treetagger.TreeTaggerProcess;
import de.unihd.dbs.uima.annotator.treetagger.TreeTaggerProperties;
import de.unihd.dbs.uima.annotator.treetagger.TreeTaggerReader;
import de.unihd.dbs.uima.annotator.treetagger.TreeTaggerTokenizer;
import de.unihd.dbs.uima.annotator.treetagger.TreeTaggerWriter;
import de.unihd.dbs.uima.types.heideltime.Sentence;
import de.unihd.dbs.uima.types.heideltime.Token;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.List;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.cas.text.AnnotationIndex;
import org.apache.uima.impl.RootUimaContext_impl;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ConfigurationManager;
import org.apache.uima.resource.ResourceManager;
import org.apache.uima.resource.impl.ConfigurationManager_impl;
import org.apache.uima.resource.impl.ResourceManager_impl;

public class TreeTaggerWrapper
extends JCasAnnotator_ImplBase {
    private Class<?> component = ((Object)((Object)this)).getClass();
    public static final String PARAM_LANGUAGE = "language";
    public static final String PARAM_ANNOTATE_TOKENS = "annotate_tokens";
    public static final String PARAM_ANNOTATE_SENTENCES = "annotate_sentences";
    public static final String PARAM_ANNOTATE_PARTOFSPEECH = "annotate_partofspeech";
    public static final String PARAM_IMPROVE_GERMAN_SENTENCES = "improvegermansentences";
    public static final String PARAM_CHINESE_TOKENIZER_PATH = "ChineseTokenizerPath";
    private Language language;
    private Boolean annotate_tokens = false;
    private Boolean annotate_sentences = false;
    private Boolean annotate_partofspeech = false;
    private TreeTaggerProperties ttprops = new TreeTaggerProperties();
    private TreeTaggerProcess ttProc = null;
    private TreeTaggerWriter ttwriter;
    private TreeTaggerReader ttreader;

    public void initialize(Language language, String treeTaggerHome, Boolean annotateTokens, Boolean annotateSentences, Boolean annotatePartOfSpeech, Boolean improveGermanSentences) {
        this.initialize(language, treeTaggerHome, annotateTokens, annotateSentences, annotatePartOfSpeech, improveGermanSentences, null);
    }

    public void initialize(Language language, String treeTaggerHome, Boolean annotateTokens, Boolean annotateSentences, Boolean annotatePartOfSpeech, Boolean improveGermanSentences, String cnTokPath) {
        this.setHome(treeTaggerHome);
        TreeTaggerContext ttContext = new TreeTaggerContext(language, annotateTokens, annotateSentences, annotatePartOfSpeech, improveGermanSentences, cnTokPath);
        this.initialize((UimaContext)ttContext);
    }

    public void initialize(UimaContext aContext) {
        this.language = Language.getLanguageFromString((String)aContext.getConfigParameterValue(PARAM_LANGUAGE));
        this.annotate_tokens = (Boolean)aContext.getConfigParameterValue(PARAM_ANNOTATE_TOKENS);
        this.annotate_sentences = (Boolean)aContext.getConfigParameterValue(PARAM_ANNOTATE_SENTENCES);
        this.annotate_partofspeech = (Boolean)aContext.getConfigParameterValue(PARAM_ANNOTATE_PARTOFSPEECH);
        String cnTokPath = (String)aContext.getConfigParameterValue(PARAM_CHINESE_TOKENIZER_PATH);
        this.ttprops.languageName = this.language.getTreeTaggerLangName();
        if (this.ttprops.rootPath == null) {
            this.ttprops.rootPath = System.getenv("TREETAGGER_HOME");
        }
        this.ttprops.tokScriptName = "utf8-tokenize.perl";
        this.ttprops.parFileName = !new File(this.ttprops.rootPath + this.ttprops.fileSeparator + "lib", this.ttprops.languageName + "-utf8.par").exists() ? this.ttprops.languageName + ".par" : this.ttprops.languageName + "-utf8.par";
        this.ttprops.abbFileName = new File(this.ttprops.rootPath + this.ttprops.fileSeparator + "lib", this.ttprops.languageName + "-abbreviations-utf8").exists() ? this.ttprops.languageName + "-abbreviations-utf8" : this.ttprops.languageName + "-abbreviations";
        this.ttprops.languageSwitch = this.language.getTreeTaggerSwitch();
        this.ttprops.chineseTokenizerPath = cnTokPath != null && !cnTokPath.equals("") ? new File(cnTokPath) : new File(this.ttprops.rootPath, "cmd");
        if (this.ttprops.rootPath == null) {
            Logger.printError("TreeTagger environment variable is not present, aborting.");
            System.exit(-1);
        }
        Boolean abbFileFlag = true;
        Boolean parFileFlag = true;
        Boolean tokScriptFlag = true;
        File abbFile = new File(this.ttprops.rootPath + this.ttprops.fileSeparator + "lib", this.ttprops.abbFileName);
        File parFile = new File(this.ttprops.rootPath + this.ttprops.fileSeparator + "lib", this.ttprops.parFileName);
        File tokFile = new File(this.ttprops.rootPath + this.ttprops.fileSeparator + "cmd", this.ttprops.tokScriptName);
        abbFileFlag = abbFile.exists();
        if (!abbFileFlag.booleanValue()) {
            if (this.language.equals((Object)Language.CHINESE) || this.language.equals((Object)Language.RUSSIAN)) {
                abbFileFlag = true;
                this.ttprops.abbFileName = null;
            } else {
                Logger.printError(this.component, "File missing to use TreeTagger tokenizer: " + this.ttprops.abbFileName);
            }
        }
        if (!(parFileFlag = Boolean.valueOf(parFile.exists())).booleanValue()) {
            Logger.printError(this.component, "File missing to use TreeTagger tokenizer: " + this.ttprops.parFileName);
        }
        if (!(tokScriptFlag = Boolean.valueOf(tokFile.exists())).booleanValue()) {
            if (this.language.equals((Object)Language.CHINESE)) {
                tokScriptFlag = true;
            } else {
                Logger.printError(this.component, "File missing to use TreeTagger tokenizer: " + this.ttprops.tokScriptName);
            }
        }
        if (!(abbFileFlag.booleanValue() && parFileFlag.booleanValue() && tokScriptFlag.booleanValue())) {
            Logger.printError(this.component, "Cannot find tree tagger (" + this.ttprops.rootPath + this.ttprops.fileSeparator + "cmd" + this.ttprops.fileSeparator + this.ttprops.tokScriptName + "). Make sure that path to tree tagger is set correctly in config.props!");
            Logger.printError(this.component, "If path is set correctly:");
            Logger.printError(this.component, "Maybe you need to download the TreeTagger tagger-scripts.tar.gz");
            Logger.printError(this.component, "from http://www.cis.uni-muenchen.de/~schmid/tools/TreeTagger/data/tagger-scripts.tar.gz");
            Logger.printError(this.component, "Extract this file and copy the missing file into the corresponding TreeTagger directories.");
            Logger.printError(this.component, "If missing, copy " + this.ttprops.abbFileName + " into " + this.ttprops.rootPath + this.ttprops.fileSeparator + "lib");
            Logger.printError(this.component, "If missing, copy " + this.ttprops.parFileName + " into " + this.ttprops.rootPath + this.ttprops.fileSeparator + "lib");
            Logger.printError(this.component, "If missing, copy " + this.ttprops.tokScriptName + " into " + this.ttprops.rootPath + this.ttprops.fileSeparator + "cmd");
            System.exit(-1);
        }
    }

    public void process(JCas jcas) throws AnalysisEngineProcessException {
        if (this.annotate_tokens.booleanValue()) {
            if (this.language.equals((Object)Language.CHINESE)) {
                this.tokenizeChinese(jcas);
            } else {
                this.tokenize(jcas);
            }
        }
        if (this.annotate_partofspeech.booleanValue()) {
            this.doTreeTag(jcas);
        }
        if (this.language == Language.GERMAN) {
            this.improveGermanSentences(jcas);
        }
        if (this.language == Language.FRENCH) {
            this.improveFrenchSentences(jcas);
        }
    }

    private void tokenize(JCas jcas) {
        Logger.printDetail(this.component, "TreeTagger (tokenization) with: " + this.ttprops.abbFileName);
        EnumSet<TreeTaggerTokenizer.Flag> flags = TreeTaggerTokenizer.Flag.getSet(this.ttprops.languageSwitch);
        this.ttprops.abbFileName = "english-abbreviations";
        TreeTaggerTokenizer ttt = this.ttprops.abbFileName != null ? new TreeTaggerTokenizer(this.ttprops.rootPath + this.ttprops.fileSeparator + "lib" + this.ttprops.fileSeparator + this.ttprops.abbFileName, flags) : new TreeTaggerTokenizer(null, flags);
        String docText = jcas.getDocumentText().replaceAll("\n\n", "\nEMPTYLINE\n");
        List<String> tokenized = ttt.tokenize(docText);
        int tokenOffset = 0;
        for (String s : tokenized) {
            if (!s.equals("EMPTYLINE") && jcas.getDocumentText().indexOf(s, tokenOffset) < 0) {
                Logger.printError(this.component, "Tokenization was interrupted because the token \"" + s + "\" could not be found in the original text. The reason for this might be that the encoding of the document is not UTF-8. This token was skipped and if it was part of a temporal expression, will not be extracted.");
                continue;
            }
            Token newToken = new Token(jcas);
            if (s.equals("EMPTYLINE")) {
                newToken.setBegin(tokenOffset);
                newToken.setEnd(tokenOffset);
                newToken.setPos("EMPTYLINE");
                if (!this.annotate_partofspeech.booleanValue()) continue;
                newToken.addToIndexes();
                continue;
            }
            newToken.setBegin(jcas.getDocumentText().indexOf(s, tokenOffset));
            newToken.setEnd(newToken.getBegin() + s.length());
            newToken.addToIndexes();
            tokenOffset = newToken.getEnd();
        }
    }

    private void tokenizeChinese(JCas jcas) {
        try {
            String[] inSplits;
            Process proc = this.ttprops.getChineseTokenizationProcess();
            Logger.printDetail(this.component, "Chinese tokenization: " + this.ttprops.chineseTokenizerPath);
            BufferedReader in = new BufferedReader(new InputStreamReader(proc.getInputStream(), "UTF-8"));
            BufferedWriter out = new BufferedWriter(new OutputStreamWriter(proc.getOutputStream(), "UTF-8"));
            Integer tokenOffset = 0;
            block2: for (String inSplit : inSplits = jcas.getDocumentText().split("[\\r\\n]+")) {
                out.write(inSplit);
                out.newLine();
                out.flush();
                String s = in.readLine();
                while (s != null) {
                    String[] outSplits;
                    for (String tok : outSplits = s.split("\\s+")) {
                        if (jcas.getDocumentText().indexOf(tok, (int)tokenOffset) < 0) {
                            throw new RuntimeException("Could not find token " + tok + " in JCas after tokenizing with Chinese tokenization script.");
                        }
                        Token newToken = new Token(jcas);
                        newToken.setBegin(jcas.getDocumentText().indexOf(tok, (int)tokenOffset));
                        newToken.setEnd(newToken.getBegin() + tok.length());
                        newToken.addToIndexes();
                        tokenOffset = newToken.getEnd();
                    }
                    if (!in.ready()) continue block2;
                    s = in.readLine();
                }
            }
            in.close();
            proc.destroy();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    private void doTreeTag(JCas jcas) {
        try {
            if (this.ttProc == null) {
                this.ttProc = new TreeTaggerProcess(this.ttprops.getTreeTaggingProcess());
            }
            Logger.printDetail(this.component, "TreeTagger (pos tagging) with: " + this.ttprops.parFileName);
            AnnotationIndex ai = jcas.getAnnotationIndex(Token.type);
            ArrayList<String> tokenStrings = new ArrayList<String>();
            ArrayList<Token> tokens = new ArrayList<Token>();
            for (Token token : ai) {
                tokenStrings.add(token.getCoveredText());
                tokens.add(token);
            }
            this.ttreader = new TreeTaggerReader(tokens, this.ttProc.getStdout(), jcas, this.annotate_sentences);
            this.ttwriter = new TreeTaggerWriter(tokenStrings, this.ttProc.getStdin());
            Thread rThread = new Thread(this.ttreader);
            Thread wThread = new Thread(this.ttwriter);
            rThread.start();
            wThread.start();
            rThread.join();
            wThread.join();
        }
        catch (IOException | InterruptedException e) {
            e.printStackTrace();
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private void doTreeTagOld(JCas jcas) {
        File tmpDocument = null;
        ArrayList<Token> tokens = new ArrayList<Token>();
        try {
            tmpDocument = File.createTempFile("postokens", null);
            BufferedWriter tmpFileWriter = new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(tmpDocument), "UTF-8"));
            for (Token t : jcas.getAnnotationIndex(Token.type)) {
                tokens.add(t);
                if (t.getBegin() == t.getEnd()) continue;
                tmpFileWriter.write(t.getCoveredText() + this.ttprops.newLineSeparator);
            }
            tmpFileWriter.close();
        }
        catch (IOException e) {
            Logger.printError("Something went wrong creating a temporary file for the treetagger to process.");
            System.exit(-1);
        }
        HashSet<String> hsEndOfSentenceTag = new HashSet<String>();
        hsEndOfSentenceTag.add("SENT");
        hsEndOfSentenceTag.add("$.");
        hsEndOfSentenceTag.add("FS");
        hsEndOfSentenceTag.add("_Z_Fst");
        hsEndOfSentenceTag.add("_Z_Int");
        hsEndOfSentenceTag.add("_Z_Exc");
        hsEndOfSentenceTag.add("ew");
        try {
            Token token;
            Process p = this.ttprops.getTreeTaggingProcess(tmpDocument);
            Logger.printDetail(this.component, "TreeTagger (pos tagging) with: " + this.ttprops.parFileName);
            BufferedReader in = new BufferedReader(new InputStreamReader(p.getInputStream(), "UTF-8"));
            Sentence sentence = null;
            int i = 0;
            String s = null;
            while ((s = in.readLine()) != null) {
                token = (Token)((Object)tokens.get(i++));
                while (token.getCoveredText().equals("")) {
                    if (this.annotate_sentences.booleanValue() && token.getPos() != null && token.getPos().equals("EMPTYLINE")) {
                        if (sentence == null) {
                            sentence = new Sentence(jcas);
                            sentence.setBegin(token.getBegin());
                        }
                        sentence.setEnd(token.getEnd());
                        if (sentence.getBegin() < sentence.getEnd()) {
                            sentence.addToIndexes();
                        }
                        sentence = null;
                    }
                    token.removeFromIndexes();
                    token = (Token)((Object)tokens.get(i++));
                }
                token.removeFromIndexes();
                if (!token.getCoveredText().equals("")) {
                    token.setPos(s);
                    token.addToIndexes();
                }
                if (!this.annotate_sentences.booleanValue()) continue;
                if (sentence == null) {
                    sentence = new Sentence(jcas);
                    sentence.setBegin(token.getBegin());
                }
                if (!hsEndOfSentenceTag.contains(s) && i != tokens.size()) continue;
                sentence.setEnd(token.getEnd());
                sentence.addToIndexes();
                sentence = null;
            }
            while (i < tokens.size()) {
                if (sentence != null) {
                    sentence.setEnd(((Token)((Object)tokens.get(tokens.size() - 1))).getEnd());
                    sentence.addToIndexes();
                }
                if ((token = (Token)((Object)tokens.get(i++))).getPos() == null || !token.getPos().equals("EMPTYLINE")) continue;
                token.removeFromIndexes();
            }
            in.close();
            p.destroy();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
        finally {
            tmpDocument.delete();
        }
    }

    public void setHome(String home) {
        this.ttprops.rootPath = home;
    }

    private void improveFrenchSentences(JCas jcas) {
        HashSet<Sentence> hsRemoveAnnotations = new HashSet<Sentence>();
        HashSet<Sentence> hsAddAnnotations = new HashSet<Sentence>();
        HashSet<String> hsSentenceBeginnings = new HashSet<String>();
        hsSentenceBeginnings.add("J.-C.");
        hsSentenceBeginnings.add("J-C.");
        hsSentenceBeginnings.add("NSJC");
        Boolean changes = true;
        while (changes.booleanValue()) {
            changes = false;
            AnnotationIndex annoHeidelSentences = jcas.getAnnotationIndex(Sentence.type);
            FSIterator iterHeidelSent = annoHeidelSentences.iterator();
            block1: while (iterHeidelSent.hasNext()) {
                Sentence s1 = (Sentence)((Object)iterHeidelSent.next());
                if (!s1.getCoveredText().endsWith("av.") && !s1.getCoveredText().endsWith("Av.") && !s1.getCoveredText().endsWith("apr.") && !s1.getCoveredText().endsWith("Apr.") && !s1.getCoveredText().endsWith("avant.") && !s1.getCoveredText().endsWith("Avant.") || !iterHeidelSent.hasNext()) continue;
                Sentence s2 = (Sentence)((Object)iterHeidelSent.next());
                iterHeidelSent.moveToPrevious();
                for (String beg : hsSentenceBeginnings) {
                    if (!s2.getCoveredText().startsWith(beg)) continue;
                    Sentence s3 = new Sentence(jcas);
                    s3.setBegin(s1.getBegin());
                    s3.setEnd(s2.getEnd());
                    hsAddAnnotations.add(s3);
                    hsRemoveAnnotations.add(s1);
                    hsRemoveAnnotations.add(s2);
                    changes = true;
                    continue block1;
                }
            }
            for (Sentence s : hsRemoveAnnotations) {
                s.removeFromIndexes(jcas);
            }
            hsRemoveAnnotations.clear();
            for (Sentence s : hsAddAnnotations) {
                s.addToIndexes(jcas);
            }
            hsAddAnnotations.clear();
        }
    }

    /*
     * WARNING - void declaration
     */
    private void improveGermanSentences(JCas jcas) {
        HashSet<String[]> posRules = new HashSet<String[]>();
        posRules.add(new String[]{"CARD", "\\$.", "NN"});
        posRules.add(new String[]{"CARD", "\\$.", "NE"});
        FSIterator sentIter = jcas.getAnnotationIndex(Sentence.type).iterator();
        HashSet toMerge = new HashSet();
        Sentence prevSent = null;
        Sentence thisSent = null;
        block0: while (sentIter.hasNext()) {
            void var8_8;
            if (thisSent == null) {
                thisSent = (Sentence)((Object)sentIter.next());
                continue;
            }
            prevSent = thisSent;
            thisSent = (Sentence)((Object)sentIter.next());
            Token penultimateToken = null;
            Object var8_9 = null;
            Token firstToken = null;
            FSIterator tokIter = jcas.getAnnotationIndex(Token.type).subiterator((AnnotationFS)thisSent);
            if (tokIter.hasNext()) {
                firstToken = (Token)((Object)tokIter.next());
            }
            tokIter = jcas.getAnnotationIndex(Token.type).subiterator((AnnotationFS)prevSent);
            while (tokIter.hasNext()) {
                if (var8_8 == null) {
                    Token token = (Token)((Object)tokIter.next());
                    continue;
                }
                penultimateToken = var8_8;
                Token token = (Token)((Object)tokIter.next());
            }
            if (penultimateToken == null || var8_8 == null || firstToken == null) continue;
            for (String[] posRule : posRules) {
                if ((penultimateToken.getPos() == null || !penultimateToken.getPos().matches(posRule[0]) || var8_8.getPos() == null || !var8_8.getPos().matches(posRule[1]) || firstToken.getPos() == null || !firstToken.getPos().matches(posRule[2])) && !firstToken.getCoveredText().matches("^[a-z/].*")) continue;
                Boolean candidateExisted = false;
                for (HashSet hashSet : toMerge) {
                    if (!hashSet.contains((Object)thisSent) && !hashSet.contains((Object)prevSent)) continue;
                    hashSet.add(prevSent);
                    hashSet.add(thisSent);
                    candidateExisted = true;
                    break;
                }
                if (candidateExisted.booleanValue()) continue block0;
                HashSet<Sentence> newCandidate = new HashSet<Sentence>();
                newCandidate.add(prevSent);
                newCandidate.add(thisSent);
                toMerge.add(newCandidate);
                continue block0;
            }
        }
        for (HashSet hashSet : toMerge) {
            Integer beginIndex = Integer.MAX_VALUE;
            Integer endIndex = Integer.MIN_VALUE;
            Sentence mergedSent = new Sentence(jcas);
            for (Sentence s : hashSet) {
                if (s.getBegin() < beginIndex) {
                    beginIndex = s.getBegin();
                }
                if (s.getEnd() > endIndex) {
                    endIndex = s.getEnd();
                }
                s.removeFromIndexes();
            }
            mergedSent.setBegin(beginIndex);
            mergedSent.setEnd(endIndex);
            mergedSent.addToIndexes();
        }
    }

    public void quit() {
        this.ttProc.close();
        this.ttProc = null;
    }

    private class TreeTaggerContext
    extends RootUimaContext_impl {
        private ConfigurationManager mConfigManager = new ConfigurationManager_impl();

        public TreeTaggerContext(Language language, Boolean annotateTokens, Boolean annotateSentences, Boolean annotatePartOfSpeech, Boolean improveGermanSentences) {
            this(language, annotateTokens, annotateSentences, annotatePartOfSpeech, improveGermanSentences, null);
        }

        public TreeTaggerContext(Language language, Boolean annotateTokens, Boolean annotateSentences, Boolean annotatePartOfSpeech, Boolean improveGermanSentences, String cnTokPath) {
            this.initializeRoot(null, (ResourceManager)new ResourceManager_impl(), this.mConfigManager);
            this.mConfigManager.setSession(this.getSession());
            this.mConfigManager.setConfigParameterValue(this.makeQualifiedName(TreeTaggerWrapper.PARAM_LANGUAGE), (Object)language.getName());
            this.mConfigManager.setConfigParameterValue(this.makeQualifiedName(TreeTaggerWrapper.PARAM_ANNOTATE_TOKENS), (Object)annotateTokens);
            this.mConfigManager.setConfigParameterValue(this.makeQualifiedName(TreeTaggerWrapper.PARAM_ANNOTATE_PARTOFSPEECH), (Object)annotatePartOfSpeech);
            this.mConfigManager.setConfigParameterValue(this.makeQualifiedName(TreeTaggerWrapper.PARAM_ANNOTATE_SENTENCES), (Object)annotateSentences);
            this.mConfigManager.setConfigParameterValue(this.makeQualifiedName(TreeTaggerWrapper.PARAM_CHINESE_TOKENIZER_PATH), (Object)cnTokPath);
        }

        public ConfigurationManager getConfigurationManager() {
            return this.mConfigManager;
        }
    }
}

