/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.pipeline;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.Annotator;
import edu.stanford.nlp.pipeline.POSTaggerAnnotator;
import edu.stanford.nlp.util.ArraySet;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.PropertiesUtils;
import edu.stanford.nlp.util.StringUtils;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import java.util.stream.Collectors;

public class MWTAnnotator
implements Annotator {
    private HashMap<String, List<String>> multiWordTokenMapping = new HashMap();
    private boolean useDictionary = false;
    private boolean preserveCasing;
    private Annotator statisticalMWTAnnotator;
    private HashMap<String, List<String>> statisticalMultiWordTokenMapping = new HashMap();
    private boolean useStatisticalModel = false;

    public MWTAnnotator(String name, Properties props) {
        String prefix;
        String string = prefix = name != null && !name.equals("") ? name + ".mwt." : "mwt.";
        if (!props.getProperty(prefix + "mappingFile", "").equals("")) {
            this.loadMultiWordTokenMappings(this.multiWordTokenMapping, props.getProperty(prefix + "mappingFile"));
            this.useDictionary = true;
        }
        if (!props.getProperty(prefix + "pos.model", "").equals("")) {
            this.useStatisticalModel = true;
            this.statisticalMWTAnnotator = new POSTaggerAnnotator("mwt.pos", props);
            this.loadMultiWordTokenMappings(this.statisticalMultiWordTokenMapping, props.getProperty(prefix + "statisticalMappingFile"));
        }
        this.preserveCasing = PropertiesUtils.getBool(props, prefix + "preserveCasing", true);
    }

    public void loadMultiWordTokenMappings(HashMap<String, List<String>> dictionary, String mapFilePath) {
        List<String> mapEntries = IOUtils.linesFromFile(mapFilePath);
        for (String mapEntry : mapEntries) {
            String originalWord = mapEntry.split("\t")[0].toLowerCase();
            List mwtWords = Arrays.asList(mapEntry.split("\t")[1].split(",")).stream().map(w -> w.toLowerCase()).collect(Collectors.toList());
            dictionary.put(originalWord, mwtWords);
        }
    }

    @Override
    public void annotate(Annotation annotation) {
        ArrayList<CoreLabel> finalDocumentTokens = new ArrayList<CoreLabel>();
        if (this.useStatisticalModel) {
            this.statisticalMWTAnnotator.annotate(annotation);
        }
        int sentNum = 0;
        for (CoreMap sentence : (List)annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
            sentence.set(CoreAnnotations.TokenBeginAnnotation.class, finalDocumentTokens.size());
            ArrayList<CoreLabel> newSentenceTokens = new ArrayList<CoreLabel>();
            int sentenceIndex = 1;
            for (CoreLabel token : (List)sentence.get(CoreAnnotations.TokensAnnotation.class)) {
                String mwtTagKey;
                List<String> tokenWords = new ArrayList<String>();
                if (this.useStatisticalModel && this.statisticalMultiWordTokenMapping.containsKey(mwtTagKey = String.format("%s-%s", token.word().toLowerCase(), token.tag()).toLowerCase())) {
                    tokenWords = this.statisticalMultiWordTokenMapping.get(mwtTagKey).stream().collect(Collectors.toList());
                }
                if (this.useDictionary && this.multiWordTokenMapping.containsKey(token.word().toLowerCase())) {
                    tokenWords = this.multiWordTokenMapping.get(token.word().toLowerCase()).stream().collect(Collectors.toList());
                }
                if (tokenWords.size() > 1) {
                    if (this.preserveCasing) {
                        if (StringUtils.isAllUpperCase(token.word())) {
                            tokenWords = tokenWords.stream().map(t -> t.toUpperCase()).collect(Collectors.toList());
                        } else if (StringUtils.isTitleCase(token.word())) {
                            tokenWords.set(0, StringUtils.toTitleCase((String)tokenWords.get(0)));
                        }
                    }
                    boolean isFirst = true;
                    for (String word : tokenWords) {
                        CoreLabel newToken = new CoreLabel();
                        newToken.setWord(word);
                        newToken.setValue(word);
                        newToken.setOriginalText(word);
                        newToken.setIsNewline(false);
                        if (token.keySet().contains(CoreAnnotations.ParentAnnotation.class)) {
                            newToken.set(CoreAnnotations.ParentAnnotation.class, token.get(CoreAnnotations.ParentAnnotation.class));
                        }
                        newToken.set(CoreAnnotations.TokenBeginAnnotation.class, finalDocumentTokens.size());
                        newToken.set(CoreAnnotations.TokenEndAnnotation.class, finalDocumentTokens.size() + 1);
                        newToken.setBeginPosition(token.beginPosition());
                        newToken.setEndPosition(token.endPosition());
                        newToken.setBefore(token.before());
                        newToken.setAfter(token.after());
                        newToken.set(CoreAnnotations.MWTTokenTextAnnotation.class, token.word());
                        newToken.setIsMWT(true);
                        if (isFirst) {
                            newToken.setIsMWTFirst(true);
                            isFirst = false;
                        } else {
                            newToken.setIsMWTFirst(false);
                        }
                        newToken.setIndex(sentenceIndex);
                        newToken.setSentIndex(sentNum);
                        newSentenceTokens.add(newToken);
                        finalDocumentTokens.add(newToken);
                        ++sentenceIndex;
                    }
                    continue;
                }
                CoreLabel newToken = new CoreLabel(token);
                newToken.set(CoreAnnotations.TokenBeginAnnotation.class, finalDocumentTokens.size());
                newToken.set(CoreAnnotations.TokenEndAnnotation.class, finalDocumentTokens.size() + 1);
                newToken.setIndex(sentenceIndex);
                newToken.setIsMWT(false);
                newToken.setIsMWTFirst(false);
                newSentenceTokens.add(newToken);
                finalDocumentTokens.add(newToken);
                ++sentenceIndex;
            }
            sentence.set(CoreAnnotations.TokenEndAnnotation.class, finalDocumentTokens.size());
            sentence.set(CoreAnnotations.TokensAnnotation.class, newSentenceTokens);
            ++sentNum;
        }
        annotation.set(CoreAnnotations.TokensAnnotation.class, finalDocumentTokens);
        if (this.useStatisticalModel) {
            for (CoreLabel token : (List)annotation.get(CoreAnnotations.TokensAnnotation.class)) {
                token.remove(CoreAnnotations.PartOfSpeechAnnotation.class);
            }
        }
    }

    @Override
    public Set<Class<? extends CoreAnnotation>> requires() {
        return Collections.unmodifiableSet(new ArraySet<Class>(Arrays.asList(CoreAnnotations.TokensAnnotation.class, CoreAnnotations.CharacterOffsetBeginAnnotation.class, CoreAnnotations.CharacterOffsetEndAnnotation.class, CoreAnnotations.SentencesAnnotation.class)));
    }

    @Override
    public Set<Class<? extends CoreAnnotation>> requirementsSatisfied() {
        return Collections.unmodifiableSet(new ArraySet<Class>(Arrays.asList(CoreAnnotations.MWTTokenTextAnnotation.class, CoreAnnotations.IsMultiWordTokenAnnotation.class)));
    }
}

