/*
 * Decompiled with CFR 0.152.
 */
package de.unihd.dbs.uima.annotator.stanfordtagger;

import de.unihd.dbs.uima.annotator.heideltime.utilities.Logger;
import de.unihd.dbs.uima.types.heideltime.Sentence;
import de.unihd.dbs.uima.types.heideltime.Token;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.TokenizerFactory;
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
import edu.stanford.nlp.tagger.maxent.TaggerConfig;
import java.io.FileInputStream;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import java.util.ListIterator;
import java.util.Properties;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;

public class StanfordPOSTaggerWrapper
extends JCasAnnotator_ImplBase {
    private Class<?> component = ((Object)((Object)this)).getClass();
    public static final String PARAM_MODEL_PATH = "model_path";
    public static final String PARAM_CONFIG_PATH = "config_path";
    public static final String PARAM_ANNOTATE_TOKENS = "annotate_tokens";
    public static final String PARAM_ANNOTATE_SENTENCES = "annotate_sentences";
    public static final String PARAM_ANNOTATE_PARTOFSPEECH = "annotate_partofspeech";
    private String model_path;
    private String config_path;
    private Boolean annotate_tokens = false;
    private Boolean annotate_sentences = false;
    private Boolean annotate_partofspeech = false;
    private MaxentTagger mt;

    public void initialize(UimaContext aContext) {
        this.annotate_tokens = (Boolean)aContext.getConfigParameterValue(PARAM_ANNOTATE_TOKENS);
        this.annotate_sentences = (Boolean)aContext.getConfigParameterValue(PARAM_ANNOTATE_SENTENCES);
        this.annotate_partofspeech = (Boolean)aContext.getConfigParameterValue(PARAM_ANNOTATE_PARTOFSPEECH);
        this.model_path = (String)aContext.getConfigParameterValue(PARAM_MODEL_PATH);
        this.config_path = (String)aContext.getConfigParameterValue(PARAM_CONFIG_PATH);
        if (this.model_path == null) {
            Logger.printError(this.component, "The model file for the Stanford Tagger was not correctly specified.");
            System.exit(-1);
        }
        try {
            if (this.config_path != null) {
                FileInputStream isr = new FileInputStream(this.config_path);
                Properties props = new Properties();
                props.load(isr);
                this.mt = new MaxentTagger(this.model_path, (Properties)new TaggerConfig(props), false);
            } else {
                this.mt = new MaxentTagger(this.model_path, (Properties)new TaggerConfig(new String[]{"-model", this.model_path}), false);
            }
        }
        catch (Exception e) {
            e.printStackTrace();
            Logger.printError(this.component, "MaxentTagger could not be instantiated with the supplied model(" + this.model_path + ") and config(" + this.config_path + ") file.");
            System.exit(-1);
        }
    }

    public void process(JCas jcas) throws AnalysisEngineProcessException {
        Integer offset = 0;
        String docText = jcas.getDocumentText();
        TokenizerFactory fac = PTBTokenizer.PTBTokenizerFactory.newTokenizerFactory();
        fac.setOptions("ptb3Escaping=false,untokenizable=noneKeep");
        List tokenArray = MaxentTagger.tokenizeText((Reader)new StringReader(docText), (TokenizerFactory)fac);
        for (List sentenceToken : tokenArray) {
            ArrayList taggedSentence = this.mt.tagSentence(sentenceToken);
            ListIterator twit = taggedSentence.listIterator();
            Sentence sentence = new Sentence(jcas);
            sentence.setBegin(offset);
            Integer wordCount = 0;
            for (HasWord wordToken : sentenceToken) {
                String thisWord;
                Token t = new Token(jcas);
                TaggedWord tw = (TaggedWord)twit.next();
                if (this.annotate_partofspeech.booleanValue()) {
                    t.setPos(tw.tag());
                }
                if (docText.indexOf(thisWord = wordToken.word(), (int)offset) < 0) {
                    Logger.printDetail(this.component, "A previously tagged token wasn't found in the document text: \"" + thisWord + "\". This may be due to unpredictable punctuation tokenization; hence this token isn't tagged.");
                    continue;
                }
                offset = docText.indexOf(thisWord, (int)offset);
                t.setBegin(offset);
                wordCount = wordCount + 1;
                offset = offset + thisWord.length();
                t.setEnd(offset);
                if (!this.annotate_tokens.booleanValue()) continue;
                t.addToIndexes();
            }
            if (!this.annotate_sentences.booleanValue()) continue;
            if (wordCount == 0) {
                sentence.setEnd(offset);
            } else {
                sentence.setEnd(offset - 1);
            }
            sentence.addToIndexes();
        }
        for (Sentence s : jcas.getAnnotationIndex(Sentence.type)) {
            if (s.getBegin() >= 0 && s.getEnd() >= 0) continue;
            System.err.println("Sentence: " + s.getBegin() + ":" + s.getEnd() + " = " + s.getCoveredText());
            System.err.println("wrong index in text: " + jcas.getDocumentText());
            System.exit(-1);
        }
        for (Token t : jcas.getAnnotationIndex(Token.type)) {
            if (t.getBegin() >= 0 && t.getEnd() >= 0) continue;
            System.err.println("In text: " + jcas.getDocumentText());
            System.err.println("Token: " + t.getBegin() + ":" + t.getEnd());
            System.exit(-1);
        }
    }
}

