/*
 * Decompiled with CFR 0.152.
 */
package eu.fbk.dh.kd.lib;

import eu.fbk.dh.kd.lib.KD_configuration;
import eu.fbk.dh.kd.lib.KD_core;
import eu.fbk.dh.kd.lib.KD_keyconcept;
import eu.fbk.dh.kd.lib.KD_utils;
import eu.fbk.dh.kd.lib.Resultset_Record;
import eu.fbk.dh.stemmer.snowball.SnowballStemmer;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.text.WordUtils;
import org.mapdb.DB;
import org.mapdb.HTreeMap;
import org.mapdb.Serializer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

class KD_extractor
implements Callable<Map<String, KD_keyconcept>> {
    private Logger logger = LoggerFactory.getLogger(KD_extractor.class);
    private List<Resultset_Record> records;
    private KD_core.Language lang;
    private Integer max_keyword_lenght = 0;
    private Double max_idf_value = 0.0;
    KD_configuration.Group lemmatization;
    private boolean skip_proper_noun = false;
    private boolean skip_keyword_with_propernoun = false;
    private boolean no_idf = false;
    private boolean no_abstract = false;
    private boolean verbose = true;
    private boolean skip_keyword_with_not_allowed_words = false;
    private boolean use_pattern_weight = false;
    private KD_configuration.Tagset tagset;
    private String languagePath = "languages" + File.separator;
    private boolean capitalize_pos = false;
    DB languageDB = null;
    private SnowballStemmer stemmer = null;

    public KD_extractor(String name, List<Resultset_Record> partition, KD_core.Language l, Double max_idf_value, KD_configuration conf, DB langDB) {
        this.records = partition;
        this.lang = l;
        this.max_keyword_lenght = conf.max_keyword_length;
        this.max_idf_value = max_idf_value;
        this.lemmatization = conf.group_by;
        this.skip_proper_noun = conf.skip_proper_noun;
        this.skip_keyword_with_propernoun = conf.skip_keyword_with_proper_noun;
        this.use_pattern_weight = conf.use_pattern_weight;
        this.no_idf = conf.no_idf;
        this.no_abstract = conf.no_abstract;
        this.languagePath = conf.languagePackPath;
        this.verbose = conf.verbose;
        this.tagset = conf.tagset;
        this.capitalize_pos = conf.capitalize_pos;
        this.skip_keyword_with_not_allowed_words = conf.skip_keyword_with_not_allowed_words;
        this.languageDB = langDB;
        Class<?> stemClass = null;
        try {
            stemClass = Class.forName("eu.fbk.dh.stemmer.snowball.ext." + this.lang.toString().toLowerCase() + "Stemmer");
            this.stemmer = (SnowballStemmer)stemClass.newInstance();
        }
        catch (Exception e) {
            this.logger.info("ATTENTION!! Invalid stemmer for language " + this.lang.toString());
        }
    }

    @Override
    public Map<String, KD_keyconcept> call() {
        HTreeMap hashLemmaBlackKey = this.languageDB.hashMap("lemma_blacklist").keySerializer((Serializer)Serializer.STRING).valueSerializer((Serializer)Serializer.DOUBLE).open();
        HTreeMap hashFileLemmata = this.languageDB.hashMap("lemmata").keySerializer((Serializer)Serializer.STRING).valueSerializer((Serializer)Serializer.DOUBLE).open();
        HTreeMap hashPosBlackKey = this.languageDB.hashMap("pos_blacklist").keySerializer((Serializer)Serializer.STRING).valueSerializer((Serializer)Serializer.DOUBLE).open();
        HTreeMap hashProperNounPosBlackKey = this.languageDB.hashMap("properNounPos_blacklist").keySerializer((Serializer)Serializer.STRING).valueSerializer((Serializer)Serializer.DOUBLE).open();
        HTreeMap hashFileWhitelist = this.languageDB.hashMap("whitelist").keySerializer((Serializer)Serializer.STRING).valueSerializer((Serializer)Serializer.DOUBLE).open();
        Pattern p = Pattern.compile("[\\(\\)\\{\\}\\[\\],;*/&!%^|\\+<>\u2190\"\u03bb\u03c0\u03c8\u03b9\u03b3\u03c6\u2217\u03b2\ufb04\u2192\u03b4.:\u03c2\u02c6~\u02dc\u03b8\u2208[0-9]]");
        ArrayList<String> capitalized_pos = new ArrayList<String>();
        if (this.capitalize_pos) {
            File f = new File(this.languagePath + File.separator + (Object)((Object)this.lang) + File.separator + "configuration_files" + File.separator + "capitalization_pos.txt");
            try {
                BufferedReader in = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(f), "UTF-8"));
                String line = "";
                while ((line = in.readLine()) != null) {
                    capitalized_pos.add(line.trim());
                }
            }
            catch (Exception e) {
                this.logger.error("Error reading capitalization_pos.txt file");
            }
        }
        HashMap<Object[], String> possible_pos_chain = new HashMap<Object[], String>();
        HashMap<Object[], String> possible_pos_chain_lemma = new HashMap<Object[], String>();
        HashMap<Object, String> possible_pos_chain_token = new HashMap<Object, String>();
        HashMap<Object[], String> possible_pos_chain_stem = new HashMap<Object[], String>();
        HashMap<Object[], Double> possible_pos_chain_boost = new HashMap<Object[], Double>();
        File f = new File(this.languagePath + File.separator + (Object)((Object)this.lang) + File.separator + "configuration_files" + File.separator + "tagset" + File.separator + this.tagset.name() + File.separator + "patterns.txt");
        try {
            BufferedReader in = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(f), "UTF-8"));
            String line = "";
            while ((line = in.readLine()) != null) {
                if (line.trim().startsWith("#") || line.trim().length() <= 0) continue;
                Object[] chian_itms = line.replace("\"", "").replace(" ", "").trim().toUpperCase().split(",");
                ArrayUtils.reverse((Object[])chian_itms);
                Double weight = 0.0;
                if (this.use_pattern_weight) {
                    weight = Double.parseDouble((String)chian_itms[0]);
                }
                if ((chian_itms = (String[])Arrays.copyOfRange(chian_itms, 1, chian_itms.length)).length > this.max_keyword_lenght) continue;
                possible_pos_chain.put(chian_itms, "");
                possible_pos_chain_lemma.put(chian_itms, "");
                possible_pos_chain_token.put(chian_itms, "");
                possible_pos_chain_stem.put(chian_itms, "");
                possible_pos_chain_boost.put(chian_itms, weight);
            }
            in.close();
        }
        catch (IOException e) {
            if (this.verbose) {
                this.logger.error("No Pattern File!!");
            }
            System.exit(1);
        }
        HashSet<Object> allThePosEntries = new HashSet<Object>();
        for (String[] pos_items : possible_pos_chain.keySet()) {
            for (String s : pos_items) {
                allThePosEntries.add(s);
            }
        }
        HashMap<Object, Integer> possible_pos_chain_insertion = new HashMap<Object, Integer>();
        HashMap<String[], Boolean> possible_pos_chain_touch = new HashMap<String[], Boolean>();
        for (String[] chain : possible_pos_chain.keySet()) {
            possible_pos_chain_insertion.put(chain, chain.length);
        }
        for (String[] chain : possible_pos_chain.keySet()) {
            possible_pos_chain_touch.put(chain, false);
        }
        String pos = "";
        String pos_head3 = "";
        String pos_head2 = "";
        String pos_head = "";
        ConcurrentHashMap<String, KD_keyconcept> expressions_collected = new ConcurrentHashMap<String, KD_keyconcept>();
        int recordset_current_position = -1;
        for (Resultset_Record record : this.records) {
            Object chain_to_be_clear2;
            ++recordset_current_position;
            pos = record.pos;
            if (pos.compareTo(",") == 0) {
                // empty if block
            }
            if (record.token.length() <= 1) {
                for (String[] chain : possible_pos_chain.keySet()) {
                    possible_pos_chain.put(chain, "");
                    possible_pos_chain_lemma.put(chain, "");
                    possible_pos_chain_token.put(chain, "");
                    possible_pos_chain_stem.put(chain, "");
                    possible_pos_chain_insertion.put(chain, chain.length);
                    possible_pos_chain_touch.put(chain, false);
                }
                continue;
            }
            if (hashPosBlackKey.containsKey((Object)pos)) {
                for (String[] chain : possible_pos_chain.keySet()) {
                    possible_pos_chain.put(chain, "");
                    possible_pos_chain_lemma.put(chain, "");
                    possible_pos_chain_token.put(chain, "");
                    possible_pos_chain_stem.put(chain, "");
                    possible_pos_chain_insertion.put(chain, chain.length);
                    possible_pos_chain_touch.put(chain, false);
                }
                continue;
            }
            Matcher m = p.matcher(record.token);
            boolean b = m.find();
            if (b) {
                for (String[] chain : possible_pos_chain.keySet()) {
                    possible_pos_chain.put(chain, "");
                    possible_pos_chain_lemma.put(chain, "");
                    possible_pos_chain_token.put(chain, "");
                    possible_pos_chain_stem.put(chain, "");
                    possible_pos_chain_insertion.put(chain, chain.length);
                    possible_pos_chain_touch.put(chain, false);
                }
                continue;
            }
            Object thelemma = "";
            String thestem = "";
            String theExpressionKey = "";
            String theExpressionKeyToken = "";
            String theExpressionKeyLemma = "";
            String theExpressionKeyStem = "";
            for (String[] chain : possible_pos_chain.keySet()) {
                if (pos.startsWith(chain[(Integer)possible_pos_chain_insertion.get(chain) - 1])) {
                    possible_pos_chain_touch.put(chain, true);
                    Object object = thelemma = record.lemma.compareTo("__NULL__") == 0 || record.lemma.contains("<") || record.lemma.contains(">") ? record.token : record.lemma;
                    if (this.capitalize_pos && capitalized_pos.contains(record.pos.toUpperCase())) {
                        record.token = WordUtils.capitalize((String)record.token);
                        record.lemma = WordUtils.capitalize((String)record.lemma);
                    } else {
                        record.token = record.token.toLowerCase();
                        record.lemma = record.lemma.toLowerCase();
                    }
                    thelemma = record.lemma.compareTo("__NULL__") == 0 || record.lemma.contains("<") || record.lemma.contains(">") ? record.token : record.lemma;
                    try {
                        if (this.stemmer != null) {
                            this.stemmer.setCurrent(record.token);
                            this.stemmer.stem();
                            thestem = this.stemmer.getCurrent();
                        }
                    }
                    catch (Exception e) {
                        e.printStackTrace();
                    }
                    thestem = thestem.length() > 0 ? thestem : record.token;
                    switch (this.lemmatization) {
                        case ALL_LEMMA: {
                            possible_pos_chain.put(chain, (String)possible_pos_chain.get(chain) + "\u00f7\u2022\u00f7" + (String)thelemma);
                            break;
                        }
                        case BY_LIST: {
                            if (!hashFileLemmata.containsKey((Object)record.lemma)) break;
                            possible_pos_chain.put(chain, (String)possible_pos_chain.get(chain) + "\u00f7\u2022\u00f7" + (String)thelemma);
                            break;
                        }
                        case BY_STEM: {
                            if (this.stemmer == null) {
                                this.logger.error("invalid stemmer");
                                System.exit(1);
                            }
                            possible_pos_chain.put(chain, (String)possible_pos_chain.get(chain) + "\u00f7\u2022\u00f7" + thestem);
                            break;
                        }
                        default: {
                            possible_pos_chain.put(chain, (String)possible_pos_chain.get(chain) + "\u00f7\u2022\u00f7" + record.token);
                        }
                    }
                    possible_pos_chain_token.put(chain, (String)possible_pos_chain_token.get(chain) + "\u00f7\u2022\u00f7" + record.token);
                    possible_pos_chain_lemma.put(chain, (String)possible_pos_chain_lemma.get(chain) + "\u00f7\u2022\u00f7" + (String)thelemma);
                    possible_pos_chain_stem.put(chain, (String)possible_pos_chain_stem.get(chain) + "\u00f7\u2022\u00f7" + thestem);
                    possible_pos_chain_insertion.put(chain, (Integer)possible_pos_chain_insertion.get(chain) - 1);
                    if ((Integer)possible_pos_chain_insertion.get(chain) != 0) continue;
                    if (!((hashLemmaBlackKey.containsKey((Object)record.lemma) || this.skip_proper_noun && hashProperNounPosBlackKey.containsKey((Object)pos.toLowerCase()) || record.token.endsWith(".")) && chain.length == 1 && !hashFileWhitelist.containsKey((Object)record.token.toLowerCase().trim()) || this.skip_keyword_with_propernoun && hashProperNounPosBlackKey.containsKey((Object)pos.toLowerCase()) && !hashFileWhitelist.containsKey((Object)record.token.toLowerCase().trim()))) {
                        KD_keyconcept expVal;
                        theExpressionKey = (String)possible_pos_chain.get(chain);
                        theExpressionKeyLemma = (String)possible_pos_chain_lemma.get(chain);
                        theExpressionKeyStem = (String)possible_pos_chain_stem.get(chain);
                        theExpressionKeyToken = (String)possible_pos_chain_token.get(chain);
                        switch (this.lemmatization) {
                            case ALL_LEMMA: {
                                theExpressionKey = theExpressionKeyLemma;
                                break;
                            }
                            case BY_LIST: {
                                if (!hashFileLemmata.containsKey((Object)record.lemma)) break;
                                theExpressionKey = record.lemma;
                                break;
                            }
                        }
                        if (expressions_collected.containsKey(theExpressionKey)) {
                            expVal = expressions_collected.get(theExpressionKey);
                            expVal.incrementFrequency();
                            expVal.addCurrentPosition(record.token_number);
                            expVal.addCurrentSentence(record.sentence_id);
                            expVal.add_variation(theExpressionKeyToken);
                            expressions_collected.put(theExpressionKey, expVal);
                        } else {
                            expVal = new KD_keyconcept();
                            expVal.frequency = 1;
                            expVal.addCurrentPosition(record.token_number);
                            expVal.setPosChain(chain);
                            expVal.cleanup(theExpressionKey);
                            expVal.cleanupLemma(theExpressionKeyLemma);
                            expVal.cleanupStem(theExpressionKeyStem);
                            expVal.cleanupToken(theExpressionKeyToken);
                            expVal.add_variation(theExpressionKeyToken);
                            expVal.addCurrentSentence(record.sentence_id);
                            if (expVal.chainlenght == 1 && record.token.length() <= 5 && KD_utils.countUppercase(record.token) > 1) {
                                expVal.isAcronym = true;
                            }
                            KD_keyconcept kD_keyconcept = expVal;
                            Double.valueOf(kD_keyconcept.patternScoreBoost + (Double)possible_pos_chain_boost.get(chain));
                            kD_keyconcept.patternScoreBoost = kD_keyconcept.patternScoreBoost;
                            expressions_collected.put(theExpressionKey, expVal);
                        }
                    }
                    possible_pos_chain.put(chain, "");
                    possible_pos_chain_lemma.put(chain, "");
                    possible_pos_chain_stem.put(chain, "");
                    possible_pos_chain_token.put(chain, "");
                    possible_pos_chain_insertion.put(chain, chain.length);
                    possible_pos_chain_touch.put(chain, false);
                    continue;
                }
                possible_pos_chain_touch.put(chain, false);
            }
            for (Object chain_to_be_clear2 : possible_pos_chain_touch.keySet()) {
                if (((Boolean)possible_pos_chain_touch.get(chain_to_be_clear2)).booleanValue()) continue;
                possible_pos_chain.put((Object[])chain_to_be_clear2, "");
                possible_pos_chain_lemma.put((Object[])chain_to_be_clear2, "");
                possible_pos_chain_stem.put((Object[])chain_to_be_clear2, "");
                possible_pos_chain_token.put(chain_to_be_clear2, "");
                possible_pos_chain_insertion.put(chain_to_be_clear2, ((Object)chain_to_be_clear2).length);
            }
            if (recordset_current_position + 1 >= this.records.size()) continue;
            Resultset_Record next = this.records.get(recordset_current_position + 1);
            if (record.sentence_id.intValue() == next.sentence_id.intValue()) continue;
            chain_to_be_clear2 = possible_pos_chain.keySet().iterator();
            while (chain_to_be_clear2.hasNext()) {
                String[] chain = (String[])chain_to_be_clear2.next();
                possible_pos_chain.put(chain, "");
                possible_pos_chain_lemma.put(chain, "");
                possible_pos_chain_stem.put(chain, "");
                possible_pos_chain_token.put(chain, "");
                possible_pos_chain_insertion.put(chain, chain.length);
                possible_pos_chain_touch.put(chain, false);
            }
        }
        this.records = new LinkedList<Resultset_Record>();
        HTreeMap hashFileBlacklist = this.languageDB.hashMap("blacklist").keySerializer((Serializer)Serializer.STRING).valueSerializer((Serializer)Serializer.DOUBLE).open();
        ArrayList<String> banned = new ArrayList<String>();
        for (Object tobeBanned : hashFileBlacklist.keySet()) {
            expressions_collected.remove((String)tobeBanned);
            if (!this.skip_keyword_with_not_allowed_words) continue;
            for (String expKey : expressions_collected.keySet()) {
                if (((KD_keyconcept)expressions_collected.get(expKey)).getTokenArray().contains(((String)tobeBanned).replace("\u00f7\u2022\u00f7", "").toLowerCase())) {
                    banned.add(expKey);
                }
                if (!expressions_collected.get(expKey).getLemmaArray().contains(((String)tobeBanned).replace("\u00f7\u2022\u00f7", "").toLowerCase())) continue;
                banned.add(expKey);
            }
        }
        for (String b : banned) {
            expressions_collected.remove(b);
        }
        HTreeMap hashFileKey = this.languageDB.hashMap("invdocfreq").keySerializer((Serializer)Serializer.STRING).valueSerializer((Serializer)Serializer.DOUBLE).open();
        for (Map.Entry entry : expressions_collected.entrySet()) {
            if (((KD_keyconcept)entry.getValue()).elements.size() > 1) {
                for (int pos_idx = 0; pos_idx < ((KD_keyconcept)entry.getValue()).position_in_text.size(); ++pos_idx) {
                    ((KD_keyconcept)entry.getValue()).position_in_text.set(pos_idx, ((KD_keyconcept)entry.getValue()).position_in_text.get(pos_idx) - ((KD_keyconcept)entry.getValue()).elements.size() + 1);
                }
            }
            if (!this.no_idf) {
                Double d = 0.0;
                for (String elem : ((KD_keyconcept)entry.getValue()).elementsLemma) {
                    if (hashFileKey.get((Object)elem) == null || !((Double)hashFileKey.get((Object)elem) > d)) continue;
                    d = (Double)hashFileKey.get((Object)elem);
                }
                if (hashFileKey.get((Object)((KD_keyconcept)entry.getValue()).getString()) != null && (Double)hashFileKey.get((Object)((KD_keyconcept)entry.getValue()).getString()) > d) {
                    d = (Double)hashFileKey.get((Object)((KD_keyconcept)entry.getValue()).getString());
                }
                ((KD_keyconcept)entry.getValue()).idf = d != 0.0 ? Double.valueOf(1.0 + d / Math.pow(this.max_idf_value, 3.0)) : (this.max_idf_value > 1.0 ? Double.valueOf(1.0 + Math.log(this.max_idf_value * 2.0) / Math.log(this.max_idf_value * 2.0)) : Double.valueOf(1.0));
            } else {
                ((KD_keyconcept)entry.getValue()).idf = 1.0;
            }
            if (!this.no_abstract) {
                double scoreBoost = 0.0;
                String en_abs_suffix = ".*(tion|ism|ity|ment|ness|age|ance|ence|ship|ability|acy)$";
                String it_abs_suffix = ".*(ismo|esimo|eria|ezza|izia|it\u00e0|et\u00e0|itudine|anza|enza)$";
                String abs_suffix = en_abs_suffix;
                if (this.lang == KD_core.Language.ITALIAN) {
                    abs_suffix = it_abs_suffix;
                }
                for (String element : ((KD_keyconcept)entry.getValue()).elements) {
                    if (!element.matches(abs_suffix)) continue;
                    scoreBoost += 1.1;
                    ((KD_keyconcept)entry.getValue()).isAbstract = true;
                }
                KD_keyconcept kD_keyconcept = (KD_keyconcept)entry.getValue();
                Double.valueOf(kD_keyconcept.scoreBoost + scoreBoost);
                kD_keyconcept.scoreBoost = kD_keyconcept.scoreBoost;
                continue;
            }
            ((KD_keyconcept)entry.getValue()).scoreBoost = 1.0;
        }
        if (this.verbose) {
            this.logger.info("x");
        }
        return expressions_collected;
    }
}

