/*
 * Decompiled with CFR 0.152.
 */
package de.unihd.dbs.uima.annotator.jvntextprowrapper;

import de.unihd.dbs.uima.annotator.heideltime.utilities.Logger;
import de.unihd.dbs.uima.types.heideltime.Sentence;
import de.unihd.dbs.uima.types.heideltime.Token;
import java.io.File;
import java.util.LinkedList;
import java.util.List;
import jmaxent.Classification;
import jvnpostag.POSContextGenerator;
import jvnpostag.POSDataReader;
import jvnsegmenter.CRFSegmenter;
import jvnsensegmenter.JVnSenSegmenter;
import jvntextpro.JVnTextPro;
import jvntextpro.conversion.CompositeUnicode2Unicode;
import jvntextpro.data.DataReader;
import jvntextpro.data.TWord;
import jvntextpro.data.TaggingData;
import jvntextpro.util.StringUtils;
import jvntokenizer.PennTokenizer;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;

public class JVnTextProWrapper
extends JCasAnnotator_ImplBase {
    private Class<?> component = ((Object)((Object)this)).getClass();
    public static final String PARAM_SENTSEGMODEL_PATH = "sent_model_path";
    public static final String PARAM_WORDSEGMODEL_PATH = "word_model_path";
    public static final String PARAM_POSMODEL_PATH = "pos_model_path";
    public static final String PARAM_ANNOTATE_TOKENS = "annotate_tokens";
    public static final String PARAM_ANNOTATE_SENTENCES = "annotate_sentences";
    public static final String PARAM_ANNOTATE_PARTOFSPEECH = "annotate_partofspeech";
    private Boolean annotate_tokens = false;
    private Boolean annotate_sentences = false;
    private Boolean annotate_partofspeech = false;
    private String sentModelPath = null;
    private String wordModelPath = null;
    private String posModelPath = null;
    JVnSenSegmenter vnSenSegmenter = new JVnSenSegmenter();
    CRFSegmenter vnSegmenter = new CRFSegmenter();
    DataReader reader = new POSDataReader();
    TaggingData dataTagger = new TaggingData();
    Classification classifier = null;

    public void initialize(UimaContext aContext) {
        this.annotate_tokens = (Boolean)aContext.getConfigParameterValue(PARAM_ANNOTATE_TOKENS);
        this.annotate_sentences = (Boolean)aContext.getConfigParameterValue(PARAM_ANNOTATE_SENTENCES);
        this.annotate_partofspeech = (Boolean)aContext.getConfigParameterValue(PARAM_ANNOTATE_PARTOFSPEECH);
        this.sentModelPath = (String)aContext.getConfigParameterValue(PARAM_SENTSEGMODEL_PATH);
        this.wordModelPath = (String)aContext.getConfigParameterValue(PARAM_WORDSEGMODEL_PATH);
        this.posModelPath = (String)aContext.getConfigParameterValue(PARAM_POSMODEL_PATH);
        if (this.sentModelPath != null && !this.vnSenSegmenter.init(this.sentModelPath)) {
            Logger.printError(this.component, "Error initializing the sentence segmenter model: " + this.sentModelPath);
            System.exit(-1);
        }
        if (this.wordModelPath != null) {
            try {
                this.vnSegmenter.init(this.wordModelPath);
            }
            catch (Exception e) {
                Logger.printError(this.component, "Error initializing the word segmenter model: " + this.wordModelPath);
                System.exit(-1);
            }
        }
        if (this.posModelPath != null) {
            try {
                this.dataTagger.addContextGenerator(new POSContextGenerator(this.posModelPath + File.separator + "featuretemplate.xml"));
                this.classifier = new Classification(this.posModelPath);
            }
            catch (Exception e) {
                Logger.printError(this.component, "Error initializing the POS tagging model: " + this.posModelPath);
                System.exit(-1);
            }
        }
    }

    public void process(JCas jcas) throws AnalysisEngineProcessException {
        Integer offset;
        Object i;
        CompositeUnicode2Unicode convertor = new CompositeUnicode2Unicode();
        String origText = jcas.getDocumentText();
        String convertedText = convertor.convert(origText);
        String senSegmentedText = this.vnSenSegmenter.senSegment(convertedText).trim();
        String tokenizedText = PennTokenizer.tokenize(senSegmentedText).trim();
        String segmentedText = this.vnSegmenter.segmenting(tokenizedText);
        String postProcessedString = new JVnTextPro().postProcessing(segmentedText).trim();
        List<jvntextpro.data.Sentence> posSentences = this.jvnTagging(postProcessedString);
        LinkedList<TWord> posWords = new LinkedList<TWord>();
        for (jvntextpro.data.Sentence sent : posSentences) {
            i = Integer.valueOf(0);
            while ((Integer)i < sent.size()) {
                posWords.add(sent.getTWordAt((Integer)i));
                i = (Integer)i + 1;
            }
        }
        if (this.annotate_sentences.booleanValue()) {
            offset = 0;
            String[] sentences = senSegmentedText.split("\n");
            for (String string : sentences) {
                Sentence s = new Sentence(jcas);
                String string2 = string.trim();
                Integer sentOffset = origText.indexOf(string2, (int)offset);
                if (sentOffset >= 0) {
                    s.setBegin(sentOffset);
                    offset = sentOffset + string2.length();
                    s.setEnd(offset);
                    s.addToIndexes();
                    continue;
                }
                String string3 = string2.substring(0, string2.length() - 1).trim();
                sentOffset = origText.indexOf(string3, (int)offset);
                if (sentOffset >= 0) {
                    s.setBegin(sentOffset);
                    offset = sentOffset + string3.length();
                    s.setEnd(offset);
                    s.addToIndexes();
                    continue;
                }
                System.err.println("Sentence \"" + string3 + "\" was not found in the original text.");
            }
        }
        if (this.annotate_tokens.booleanValue()) {
            offset = 0;
            String[] tokens = postProcessedString.split("\\s+");
            i = 0;
            while ((Integer)i < tokens.length) {
                String token = tokens[(Integer)i].trim();
                String thisPosTag = null;
                if (posWords.size() >= (Integer)i + 1) {
                    if (!token.equals(((TWord)posWords.get((Integer)i)).getWord())) {
                        System.err.println("Couldn't match token: " + token + " to expected word/tag combination " + ((TWord)posWords.get((Integer)i)).getWord());
                    } else {
                        thisPosTag = ((TWord)posWords.get((Integer)i)).getTag();
                    }
                }
                Integer n = origText.indexOf(token, (int)offset);
                Token t = new Token(jcas);
                if (n >= 0) {
                    t.setBegin(n);
                    offset = n + token.length();
                    t.setEnd(offset);
                    this.sanitizeToken(t, jcas);
                    if (this.annotate_tokens.booleanValue()) {
                        t.setPos(thisPosTag);
                    }
                    t.addToIndexes();
                } else {
                    String underscoreToSpaceToken = token.replaceAll("_", " ");
                    Integer spaceOffset = origText.indexOf(underscoreToSpaceToken, (int)offset);
                    String underscoreRemovedToken = token.replaceAll("_", "");
                    Integer removedOffset = origText.indexOf(underscoreRemovedToken, (int)offset);
                    if (removedOffset >= 0 && spaceOffset >= 0) {
                        if (removedOffset >= spaceOffset) {
                            t.setBegin(spaceOffset);
                            offset = spaceOffset + underscoreToSpaceToken.length();
                            t.setEnd(offset);
                            this.sanitizeToken(t, jcas);
                            if (this.annotate_tokens.booleanValue()) {
                                t.setPos(thisPosTag);
                            }
                            t.addToIndexes();
                        } else {
                            t.setBegin(removedOffset);
                            offset = removedOffset + underscoreRemovedToken.length();
                            t.setEnd(offset);
                            this.sanitizeToken(t, jcas);
                            t.addToIndexes();
                        }
                    } else if (removedOffset >= 0 && spaceOffset == -1) {
                        t.setBegin(removedOffset);
                        offset = removedOffset + underscoreRemovedToken.length();
                        t.setEnd(offset);
                        this.sanitizeToken(t, jcas);
                        if (this.annotate_tokens.booleanValue()) {
                            t.setPos(thisPosTag);
                        }
                        t.addToIndexes();
                    } else if (removedOffset == -1 && spaceOffset >= 0) {
                        t.setBegin(spaceOffset);
                        offset = spaceOffset + underscoreToSpaceToken.length();
                        t.setEnd(offset);
                        this.sanitizeToken(t, jcas);
                        if (this.annotate_tokens.booleanValue()) {
                            t.setPos(thisPosTag);
                        }
                        t.addToIndexes();
                    } else {
                        System.err.println("Token \"" + token + "\" was not found in the original text.");
                    }
                }
                i = (Integer)i + 1;
            }
        }
    }

    private Boolean sanitizeToken(Token t, JCas jcas) {
        Token puncToken;
        Character thisChar;
        Boolean workDone = false;
        if (t.getCoveredText().matches("^\\p{Punct}.*") && t.getCoveredText().length() > 1) {
            thisChar = Character.valueOf(t.getCoveredText().charAt(0));
            t.setBegin(t.getBegin() + 1);
            puncToken = new Token(jcas);
            puncToken.setBegin(t.getBegin() - 1);
            puncToken.setEnd(t.getBegin());
            if (this.annotate_partofspeech.booleanValue()) {
                puncToken.setPos("" + thisChar);
            }
            if (this.annotate_tokens.booleanValue()) {
                puncToken.addToIndexes();
            }
            workDone = true;
        }
        if (t.getCoveredText().matches(".*\\p{Punct}$") && t.getCoveredText().length() > 1) {
            thisChar = Character.valueOf(t.getCoveredText().charAt(t.getEnd() - t.getBegin() - 1));
            t.setEnd(t.getEnd() - 1);
            puncToken = new Token(jcas);
            puncToken.setBegin(t.getEnd());
            puncToken.setEnd(t.getEnd() + 1);
            if (this.annotate_partofspeech.booleanValue()) {
                puncToken.setPos("" + thisChar);
            }
            if (this.annotate_tokens.booleanValue()) {
                puncToken.addToIndexes();
            }
            workDone = true;
        }
        if (workDone.booleanValue()) {
            workDone = this.sanitizeToken(t, jcas);
        }
        return workDone;
    }

    public List<jvntextpro.data.Sentence> jvnTagging(String instr) {
        List<jvntextpro.data.Sentence> data = this.reader.readString(instr);
        for (int i = 0; i < data.size(); ++i) {
            jvntextpro.data.Sentence sent = data.get(i);
            for (int j = 0; j < sent.size(); ++j) {
                String[] cps = this.dataTagger.getContext(sent, j);
                String label = this.classifier.classify(cps);
                if (label.equalsIgnoreCase("Mrk")) {
                    label = StringUtils.isPunc(sent.getWordAt(j)) ? sent.getWordAt(j) : "X";
                }
                sent.getTWordAt(j).setTag(label);
            }
        }
        return data;
    }
}

