/*
 * Decompiled with CFR 0.152.
 */
package hr.fer.zemris.takelab.uima.annotator.hunpos;

import de.unihd.dbs.uima.annotator.heideltime.resources.Language;
import de.unihd.dbs.uima.annotator.heideltime.utilities.Logger;
import de.unihd.dbs.uima.types.heideltime.Sentence;
import de.unihd.dbs.uima.types.heideltime.Token;
import hr.fer.zemris.takelab.splitter.TokenSplitter;
import hr.fer.zemris.takelab.uima.annotator.hunpos.HunPosAnnotionTranslator;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.text.AnnotationIndex;
import org.apache.uima.impl.RootUimaContext_impl;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ConfigurationManager;
import org.apache.uima.resource.ResourceManager;
import org.apache.uima.resource.impl.ConfigurationManager_impl;
import org.apache.uima.resource.impl.ResourceManager_impl;

public class HunPosTaggerWrapper
extends JCasAnnotator_ImplBase {
    public static final String PARAM_LANGUAGE = "language";
    public static final String PARAM_PATH = "hunpos_path";
    public static final String PARAM_MODEL_PATH = "model_path";
    public static final String PARAM_ANNOTATE_TOKENS = "annotate_tokens";
    public static final String PARAM_ANNOTATE_SENTENCES = "annotate_sentences";
    public static final String PARAM_ANNOTATE_POS = "annotate_pos";
    private Language language;
    private boolean annotate_tokens;
    private boolean annotate_sentences;
    private boolean annotate_pos;

    public void initialize(Language language, String hunpos_path, String hunpos_model_path, Boolean annotateTokens, Boolean annotateSentences, Boolean annotatePOS) {
        this.initialize((UimaContext)new HunPosTaggerContext(language, hunpos_path, hunpos_model_path, annotateTokens, annotateSentences, annotatePOS));
    }

    public void initialize(UimaContext aContext) {
        this.annotate_tokens = (Boolean)aContext.getConfigParameterValue(PARAM_ANNOTATE_TOKENS);
        this.annotate_sentences = (Boolean)aContext.getConfigParameterValue(PARAM_ANNOTATE_SENTENCES);
        this.annotate_pos = (Boolean)aContext.getConfigParameterValue(PARAM_ANNOTATE_POS);
        this.language = Language.getLanguageFromString((String)aContext.getConfigParameterValue(PARAM_LANGUAGE));
        String hunposPath = (String)aContext.getConfigParameterValue(PARAM_PATH);
        String modelPath = (String)aContext.getConfigParameterValue(PARAM_MODEL_PATH);
        HunPosWrapper.initialize(modelPath, hunposPath);
    }

    public void process(JCas aJCas) throws AnalysisEngineProcessException {
        if (this.annotate_tokens) {
            TokenSplitterWrapper.splitTokens(aJCas);
        }
        if (this.annotate_pos) {
            HunPosWrapper.tagPOS(aJCas, this.annotate_sentences);
        }
        if (this.language == Language.CROATIAN) {
            this.fixCroatianSentences(aJCas);
        }
    }

    private void fixCroatianSentences(JCas jCas) {
        String reBeginsWithMonth = "^(Sije\u010d(anj|nja)|Velja\u010d[ae]|O\u017euj(ak|ka)|Trav(anj|nja)|Svib(anj|nja)|Lip(anj|nja)|Srp(anj|nja)|Kolovoza?|Ruj(an|na)|Listopada?|Studen(i|og)|Prosin(ac|ca)).*";
        String reBeginsWithUppercase = "^[A-Z\u0160\u0110\u010c\u0106\u017d].*";
        String reEndsWithDate = "(?s).*\\d{1,4}\\.$";
        String reFalseSentenceEnd = "(?s)^.*(\\s[A-Z]\\.|[:;,%\"\\(\\)\\-])$";
        AnnotationIndex annoHeidelSentences = jCas.getAnnotationIndex(Sentence.type);
        FSIterator iterHeidelSent = annoHeidelSentences.iterator();
        HashSet<Sentence> hsNewAnnotations = new HashSet<Sentence>();
        HashSet<Sentence> hsOldAnnotations = new HashSet<Sentence>();
        boolean prevIsDate = false;
        boolean prevIsFalseEnd = false;
        Sentence sOld = null;
        while (iterHeidelSent.hasNext()) {
            Sentence s = (Sentence)((Object)iterHeidelSent.next());
            String text = s.getCoveredText();
            if (prevIsFalseEnd || prevIsDate && (!text.matches("^[A-Z\u0160\u0110\u010c\u0106\u017d].*") || text.matches("^[A-Z\u0160\u0110\u010c\u0106\u017d].*") && text.matches("^(Sije\u010d(anj|nja)|Velja\u010d[ae]|O\u017euj(ak|ka)|Trav(anj|nja)|Svib(anj|nja)|Lip(anj|nja)|Srp(anj|nja)|Kolovoza?|Ruj(an|na)|Listopada?|Studen(i|og)|Prosin(ac|ca)).*"))) {
                Sentence sMerged = new Sentence(jCas);
                sMerged.setBegin(sOld.getBegin());
                sMerged.setEnd(s.getEnd());
                if (hsNewAnnotations.contains((Object)sOld)) {
                    hsNewAnnotations.remove((Object)sOld);
                }
                hsNewAnnotations.add(sMerged);
                prevIsDate = false;
                prevIsFalseEnd = false;
                sOld = sMerged;
                text = sOld.getCoveredText();
            } else {
                if (!hsNewAnnotations.contains((Object)s)) {
                    hsNewAnnotations.add(s);
                }
                sOld = s;
            }
            if (text.matches("(?s).*\\d{1,4}\\.$")) {
                prevIsDate = true;
            }
            if (!text.matches("(?s)^.*(\\s[A-Z]\\.|[:;,%\"\\(\\)\\-])$")) continue;
            prevIsFalseEnd = true;
        }
        iterHeidelSent.moveToFirst();
        while (iterHeidelSent.hasNext()) {
            hsOldAnnotations.add((Sentence)((Object)iterHeidelSent.next()));
        }
        for (Sentence s : hsOldAnnotations) {
            s.removeFromIndexes(jCas);
        }
        for (Sentence s : hsNewAnnotations) {
            s.addToIndexes(jCas);
        }
    }

    private class HunPosTaggerContext
    extends RootUimaContext_impl {
        public HunPosTaggerContext(Language language, String hunpos_path, String hunpos_model_path, Boolean annotateTokens, Boolean annotateSentences, Boolean annotatePartOfSpeech) {
            ConfigurationManager_impl configManager = new ConfigurationManager_impl();
            this.initializeRoot(null, (ResourceManager)new ResourceManager_impl(), (ConfigurationManager)configManager);
            configManager.setSession(this.getSession());
            configManager.setConfigParameterValue(this.makeQualifiedName(HunPosTaggerWrapper.PARAM_LANGUAGE), (Object)language.getName());
            configManager.setConfigParameterValue(this.makeQualifiedName(HunPosTaggerWrapper.PARAM_MODEL_PATH), (Object)hunpos_model_path);
            configManager.setConfigParameterValue(this.makeQualifiedName(HunPosTaggerWrapper.PARAM_PATH), (Object)hunpos_path);
            configManager.setConfigParameterValue(this.makeQualifiedName(HunPosTaggerWrapper.PARAM_ANNOTATE_TOKENS), (Object)annotateTokens);
            configManager.setConfigParameterValue(this.makeQualifiedName(HunPosTaggerWrapper.PARAM_ANNOTATE_POS), (Object)annotatePartOfSpeech);
            configManager.setConfigParameterValue(this.makeQualifiedName(HunPosTaggerWrapper.PARAM_ANNOTATE_SENTENCES), (Object)annotateSentences);
        }
    }

    private static class HunPosWrapper {
        private static List<String> command;
        public static final String HUNPOS_HOME = "HUNPOS_HOME";

        private HunPosWrapper() {
        }

        public static void initialize(String modelPath) {
            HunPosWrapper.initialize(modelPath, null);
        }

        public static void initialize(String modelPath, String hunposPath) {
            String hunposRoot = hunposPath;
            if (hunposRoot == null) {
                hunposRoot = System.getenv(HUNPOS_HOME);
            }
            if (hunposRoot == null || !new File(hunposRoot).exists()) {
                Logger.printError(HunPosWrapper.class, "The environment variable HUNPOS_HOME was not set, or set to \"" + hunposRoot + "\", which does not exist.");
                System.exit(-1);
            }
            File hunPosRootFile = new File(hunposRoot);
            command = new ArrayList<String>();
            command.add(hunposRoot + "/hunpos-tag");
            File modelFile = new File(hunPosRootFile, modelPath);
            if (modelFile.exists()) {
                command.add(modelFile.getAbsolutePath());
            } else {
                Logger.printError(HunPosWrapper.class, "The supplied model path " + modelPath + " does not exist.");
                System.exit(-1);
            }
        }

        public static void tagPOS(JCas jCas, boolean tagSentences) {
            Process p = null;
            String[] cmd = new String[command.size()];
            command.toArray(cmd);
            try {
                p = Runtime.getRuntime().exec(cmd);
            }
            catch (IOException e2) {
                Logger.printError(HunPosWrapper.class, "An error occured while trying to call HunPos at " + System.getenv(HUNPOS_HOME));
                e2.printStackTrace();
            }
            OutputStreamWriter writer = new OutputStreamWriter(p.getOutputStream());
            Logger.printDetail(HunPosWrapper.class, "Starting the POS tagging process.");
            ArrayList<Token> tokens = new ArrayList<Token>();
            for (Token t : jCas.getAnnotationIndex(Token.type)) {
                tokens.add(t);
            }
            class TaggingJob
            implements Runnable {
                private final Pattern HUNPOS_PATTERN = Pattern.compile("^(.+)\t([^\t]+)$");
                private JCas jCas;
                private List<Token> tokens;
                private boolean tagSentences;
                private InputStream input;
                private final String terminal = "Z";
                private HunPosAnnotionTranslator trans = new HunPosAnnotionTranslator();

                public TaggingJob(JCas jCas, List<Token> tokens, boolean tagSentences, InputStream input) {
                    this.jCas = jCas;
                    this.tokens = tokens;
                    this.tagSentences = tagSentences;
                    this.input = input;
                }

                @Override
                public void run() {
                    InputStreamReader ir = new InputStreamReader((InputStream)new BufferedInputStream(this.input), Charset.forName("UTF-8"));
                    Scanner scan = new Scanner(ir);
                    int i = 0;
                    String s = null;
                    Sentence sentence = null;
                    try {
                        while (scan.hasNextLine()) {
                            s = scan.nextLine().trim();
                            if (s.isEmpty()) continue;
                            Token token = this.tokens.get(i++);
                            while (token.getCoveredText().isEmpty()) {
                                token.setPos("");
                                token.addToIndexes();
                                token = this.tokens.get(i++);
                            }
                            Matcher m = this.HUNPOS_PATTERN.matcher(s);
                            if (m.find()) {
                                s = m.group(2);
                            } else {
                                --i;
                            }
                            token.removeFromIndexes();
                            token.setPos(this.trans.translate(s));
                            token.addToIndexes();
                            if (!this.tagSentences) continue;
                            if (sentence == null) {
                                sentence = new Sentence(this.jCas);
                                sentence.setBegin(token.getBegin());
                            }
                            if (!"Z".equals(s) && i != this.tokens.size()) continue;
                            sentence.setEnd(token.getEnd());
                            sentence.addToIndexes();
                            sentence = null;
                        }
                        scan.close();
                    }
                    catch (Exception e) {
                        e.printStackTrace();
                    }
                }
            }
            Thread thr = new Thread(new TaggingJob(jCas, tokens, tagSentences, p.getInputStream()));
            thr.start();
            for (Token t : tokens) {
                try {
                    writer.write(t.getCoveredText() + "\n");
                }
                catch (IOException e) {
                    e.printStackTrace();
                }
            }
            try {
                ((Writer)writer).close();
            }
            catch (IOException e) {
                e.printStackTrace();
            }
            try {
                thr.join();
                p.waitFor();
            }
            catch (InterruptedException e1) {
                e1.printStackTrace();
            }
        }
    }

    private static class TokenSplitterWrapper {
        private TokenSplitterWrapper() {
        }

        public static void splitTokens(JCas jcas) {
            List<String> tokens = TokenSplitter.getTokens(jcas.getDocumentText());
            int tokenOffset = 0;
            for (String token : tokens) {
                if (jcas.getDocumentText().indexOf(token, tokenOffset) < 0) {
                    throw new RuntimeException("Opps! Could not find token " + token + " in JCas after tokenizing with token splitter for Croatian. Hmm, there may exist a charset missmatch! Default encoding is " + Charset.defaultCharset().name() + " and should always be UTF-8.");
                }
                Token newToken = new Token(jcas);
                newToken.setBegin(jcas.getDocumentText().indexOf(token, tokenOffset));
                newToken.setEnd(newToken.getBegin() + token.length());
                newToken.addToIndexes();
                tokenOffset = newToken.getEnd();
            }
        }
    }
}

