/*
 * Decompiled with CFR 0.152.
 */
package de.unihd.dbs.uima.reader.aceternreader;

import de.unihd.dbs.uima.types.heideltime.Dct;
import de.unihd.dbs.uima.types.heideltime.SourceDocInfo;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.regex.MatchResult;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.collection.CollectionReader_ImplBase;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.FileUtils;
import org.apache.uima.util.Level;
import org.apache.uima.util.Logger;
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;

public class ACETernReader
extends CollectionReader_ImplBase {
    private static Logger logger = null;
    private static final String compontent_id = "de.unihd.dbs.uima.reader.aceternreader";
    public Integer numberOfDocuments = 0;
    public static final String PARAM_INPUTDIR = "InputDirectory";
    public static final String PARAM_DCT = "AnnotateCreationTime";
    public Boolean annotateDCT = false;
    private ArrayList<File> mFiles;
    private int currentIndex;

    public void initialize() throws ResourceInitializationException {
        logger = this.getUimaContext().getLogger();
        logger.log(Level.INFO, "initialize() - Initializing ACETern-Reader...");
        this.annotateDCT = (Boolean)this.getConfigParameterValue(PARAM_DCT);
        File directory = new File(((String)this.getConfigParameterValue(PARAM_INPUTDIR)).trim());
        this.currentIndex = 0;
        if (!directory.exists() || !directory.isDirectory()) {
            throw new ResourceInitializationException("directory_not_found", new Object[]{PARAM_INPUTDIR, this.getMetaData().getName(), directory.getPath()});
        }
        this.mFiles = new ArrayList();
        File[] files = directory.listFiles();
        for (int i = 0; i < files.length; ++i) {
            if (files[i].isDirectory()) continue;
            this.mFiles.add(files[i]);
        }
    }

    public boolean hasNext() {
        return this.currentIndex < this.mFiles.size();
    }

    public void getNext(CAS aCAS) throws IOException, CollectionException {
        String xml;
        JCas jcas;
        System.err.print(".");
        try {
            jcas = aCAS.getJCas();
        }
        catch (CASException e) {
            throw new CollectionException((Throwable)e);
        }
        File file = this.mFiles.get(this.currentIndex++);
        logger.log(Level.INFO, "getNext(CAS) - Reading file " + file.getName());
        String text = "";
        text = xml = FileUtils.file2String((File)file);
        text = text.replaceAll("(?s)<QUOTE PREVIOUSPOST=.*?/>", "");
        jcas.setDocumentText(text);
        SourceDocInfo srcDocInfo = new SourceDocInfo(jcas);
        URL url = file.getAbsoluteFile().toURI().toURL();
        srcDocInfo.setUri(url.toString());
        srcDocInfo.addToIndexes();
        if (this.annotateDCT.booleanValue()) {
            this.setDCT(xml, jcas, url.toString());
        }
    }

    public void setDCT(String xml, JCas jcas, String filename) {
        String datetimetag = null;
        String dateformat1 = "(.*?)(\\d\\d\\d\\d)-(\\d\\d)-(\\d\\d)(T| )(\\d\\d):(\\d\\d):(\\d\\d)(.*?)";
        String dateformat2 = "(.*?)(\\d\\d\\d\\d)-(\\d\\d)-(\\d\\d)(T| )(\\d):(\\d\\d):(\\d\\d)(.*?)";
        String dateformat3 = "(.*?)(\\d\\d)/(\\d\\d)/(\\d\\d\\d\\d) (\\d\\d):(\\d\\d):(\\d\\d)\\.(\\d\\d)(.*?)";
        String dateformat4 = "(.*?)(\\d\\d)/(\\d\\d)/(\\d\\d\\d\\d) (\\d):(\\d\\d):(\\d\\d)\\.(\\d\\d)(.*?)";
        String dateformat5 = "(.*?)(\\d\\d\\d\\d)-(\\d\\d)-(\\d\\d)(.*?)";
        String dateformat6 = "(.*?)(\\d\\d)/(\\d\\d)/(\\d\\d\\d\\d)(.*?)";
        String dateformat7 = "(.*?)(January|February|March|April|May|June|July|August|September|October|November|December) ([\\d]?[\\d]),? (\\d\\d\\d\\d)(.*?)";
        String dateformat8 = "(.*?)(\\d\\d\\d\\d)(\\d\\d)(\\d\\d)-(\\d\\d):(\\d\\d):(\\d\\d)(.*?)";
        String dateformat9 = "(.*?)(\\d\\d\\d\\d)(\\d\\d)(\\d\\d)(.*?)";
        for (MatchResult m : ACETernReader.findMatches(Pattern.compile("(<DATETIME>|<DATE_TIME>|<DATE>|<STORY_REF_TIME>)((" + dateformat1 + ")|(" + dateformat2 + ")|(" + dateformat3 + ")|(" + dateformat4 + ")|(" + dateformat5 + ")|(" + dateformat6 + ")|(" + dateformat7 + ")|(" + dateformat8 + ")|(" + dateformat9 + ")(</DATETIME>|</DATE_TIME>|</DATE>|</STORY_REF_TIME>))"), xml)) {
            datetimetag = m.group(2);
        }
        String time_value = null;
        String date_value = null;
        if (datetimetag != null) {
            if (datetimetag.matches(dateformat1)) {
                for (MatchResult m : ACETernReader.findMatches(Pattern.compile(dateformat1), datetimetag)) {
                    date_value = m.group(2) + "-" + m.group(3) + "-" + m.group(4);
                    time_value = m.group(2) + "-" + m.group(3) + "-" + m.group(4) + "T" + m.group(6) + ":" + m.group(7) + ":" + m.group(8);
                }
            } else if (datetimetag.matches(dateformat2)) {
                for (MatchResult m : ACETernReader.findMatches(Pattern.compile(dateformat2), datetimetag)) {
                    date_value = m.group(2) + "-" + m.group(3) + "-" + m.group(4);
                    time_value = m.group(2) + "-" + m.group(3) + "-" + m.group(4) + "T0" + m.group(6) + ":" + m.group(7) + ":" + m.group(8);
                }
            } else if (datetimetag.matches(dateformat3)) {
                for (MatchResult m : ACETernReader.findMatches(Pattern.compile(dateformat3), datetimetag)) {
                    date_value = m.group(4) + "-" + m.group(2) + "-" + m.group(3);
                    time_value = m.group(4) + "-" + m.group(2) + "-" + m.group(3) + "T" + m.group(5) + ":" + m.group(6) + ":" + m.group(7) + "." + m.group(8);
                }
            } else if (datetimetag.matches(dateformat4)) {
                for (MatchResult m : ACETernReader.findMatches(Pattern.compile(dateformat4), datetimetag)) {
                    date_value = m.group(4) + "-" + m.group(2) + "-" + m.group(3);
                    time_value = m.group(4) + "-" + m.group(2) + "-" + m.group(3) + "T0" + m.group(5) + ":" + m.group(6) + ":" + m.group(7) + "." + m.group(8);
                }
            } else if (datetimetag.matches(dateformat5)) {
                for (MatchResult m : ACETernReader.findMatches(Pattern.compile(dateformat5), datetimetag)) {
                    date_value = m.group(2) + "-" + m.group(3) + "-" + m.group(4);
                }
            } else if (datetimetag.matches(dateformat6)) {
                for (MatchResult m : ACETernReader.findMatches(Pattern.compile(dateformat6), datetimetag)) {
                    date_value = m.group(4) + "-" + m.group(2) + "-" + m.group(3);
                }
            } else if (datetimetag.matches(dateformat7)) {
                for (MatchResult m : ACETernReader.findMatches(Pattern.compile(dateformat7), datetimetag)) {
                    String year = m.group(4);
                    String month = this.normMonth(m.group(2));
                    String day = this.normDay(m.group(3));
                    date_value = year + "-" + month + "-" + day;
                }
            } else if (datetimetag.matches(dateformat8)) {
                for (MatchResult m : ACETernReader.findMatches(Pattern.compile(dateformat8), datetimetag)) {
                    date_value = m.group(2) + "-" + m.group(3) + "-" + m.group(4);
                    time_value = m.group(2) + "-" + m.group(3) + "-" + m.group(4) + "T" + m.group(5) + ":" + m.group(6) + ":" + m.group(7);
                }
            } else if (datetimetag.matches(dateformat9)) {
                for (MatchResult m : ACETernReader.findMatches(Pattern.compile(dateformat9), datetimetag)) {
                    date_value = m.group(2) + "-" + m.group(3) + "-" + m.group(4);
                }
            } else {
                System.err.println();
                System.err.println("[de.unihd.dbs.uima.reader.aceternreader] cannot set dct with datetimetag: " + datetimetag);
            }
            if (date_value != null) {
                Dct dct = new Dct(jcas);
                dct.setBegin(0);
                dct.setEnd(1);
                dct.setFilename(filename);
                dct.setTimexId("dct");
                if (time_value != null) {
                    dct.setValue(time_value);
                } else if (date_value != null) {
                    dct.setValue(date_value);
                } else {
                    System.err.println();
                    System.err.println("[de.unihd.dbs.uima.reader.aceternreader] something wrong with setting DCT of : " + datetimetag);
                }
                dct.addToIndexes();
            }
        } else {
            String exactDay;
            String exactMonth;
            String refDay;
            String refMonth;
            Object refYear;
            if (date_value == null) {
                refYear = "";
                refMonth = "";
                refDay = "";
                for (MatchResult m1 : ACETernReader.findMatches(Pattern.compile("DATE:[\\s]+(" + dateformat7 + ")"), xml)) {
                    String referenceDate = m1.group(1);
                    if (!referenceDate.matches(dateformat7)) continue;
                    for (MatchResult mr : ACETernReader.findMatches(Pattern.compile(dateformat7), referenceDate)) {
                        refYear = mr.group(4);
                        refMonth = this.normMonth(mr.group(2));
                        refDay = this.normDay(mr.group(3));
                    }
                }
                for (MatchResult m : ACETernReader.findMatches(Pattern.compile("<STORY_REF_TIME>(Jan\\.|Feb\\.|Mar\\.|Apr\\.|May\\.|Jun\\.|Jul\\.|Aug\\.|Sep\\.|Oct\\.|Nov\\.|Dec\\.|JAN\\.|FEB\\.|MAR\\.|APR\\.|MAY\\.|JUN\\.|JUL\\.|AUG\\.|SEP\\.|OCT\\.|NOV\\.|DEC\\.)[\\s]+([\\d]?[\\d])</STORY_REF_TIME>"), xml)) {
                    exactMonth = m.group(1);
                    exactDay = m.group(2);
                    date_value = refYear + "-" + this.normMonth(exactMonth) + "-" + this.normDay(exactDay);
                }
            }
            if (date_value == null) {
                for (MatchResult m : ACETernReader.findMatches(Pattern.compile("<STORY_REF_TIME>.*?(\\d\\d\\d\\d)(\\d\\d)(\\d\\d).*?</STORY_REF_TIME>"), xml)) {
                    String exactYear = m.group(1);
                    String exactMonth2 = m.group(2);
                    String exactDay2 = m.group(3);
                    date_value = exactYear + "-" + exactMonth2 + "-" + exactDay2;
                }
            }
            if (date_value == null) {
                refYear = "";
                refMonth = "";
                refDay = "";
                for (MatchResult m : ACETernReader.findMatches(Pattern.compile("<DOCNO>.*?(\\d\\d\\d\\d)(\\d\\d)(\\d\\d).*?</DOCNO>"), xml)) {
                    refYear = m.group(1);
                    refMonth = this.normMonth(m.group(2));
                    refDay = this.normDay(m.group(3));
                }
                if (!((String)refYear).matches("")) {
                    for (MatchResult m : ACETernReader.findMatches(Pattern.compile("<STORY_REF_TIME>.*?(January|February|March|April|May|June|July|August|September|October|November|December) ([\\d]?[\\d]).*?</STORY_REF_TIME>"), xml)) {
                        exactMonth = this.normMonth(m.group(1));
                        exactDay = this.normDay(m.group(2));
                        date_value = refYear + "-" + exactMonth + "-" + exactDay;
                    }
                }
            }
            if (date_value == null) {
                refYear = "";
                refMonth = "";
                refDay = "";
                for (MatchResult m : ACETernReader.findMatches(Pattern.compile("Publish Date:[\\s]+(\\d\\d)/(\\d\\d)/(\\d\\d)"), xml)) {
                    refYear = "19" + m.group(3);
                    refMonth = this.normMonth(m.group(1));
                    refDay = this.normDay(m.group(2));
                }
                if (!((String)refYear).matches("")) {
                    for (MatchResult m : ACETernReader.findMatches(Pattern.compile("<STORY_REF_TIME>.*?(Jan\\.|Feb\\.|Mar\\.|Apr\\.|May\\.|Jun\\.|Jul\\.|Aug\\.|Sep\\.|Oct\\.|Nov\\.|Dec\\.|JAN\\.|FEB\\.|MAR\\.|APR\\.|MAY\\.|JUN\\.|JUL\\.|AUG\\.|SEP\\.|OCT\\.|NOV\\.|DEC\\.)[\\s]+([\\d]?[\\d]).*?</STORY_REF_TIME>"), xml)) {
                        exactMonth = this.normMonth(m.group(1));
                        exactDay = this.normDay(m.group(2));
                        date_value = (String)refYear + "-" + exactMonth + "-" + exactDay;
                    }
                }
            }
            if (date_value == null) {
                try {
                    for (MatchResult m : ACETernReader.findMatches(Pattern.compile("(<DOC ID=\".*?\" DATE=\")(" + dateformat9 + ")(\">)"), xml)) {
                        datetimetag = m.group(2);
                    }
                    if (datetimetag.matches(dateformat9)) {
                        for (MatchResult m : ACETernReader.findMatches(Pattern.compile(dateformat9), datetimetag)) {
                            date_value = m.group(2) + "-" + m.group(3) + "-" + m.group(4);
                        }
                    } else {
                        System.err.println();
                        System.err.println("[de.unihd.dbs.uima.reader.aceternreader] cannot set dct with datetimetag: " + datetimetag);
                    }
                }
                catch (NullPointerException refYear2) {
                    // empty catch block
                }
            }
            if (date_value == null) {
                System.err.println();
                System.err.println("[de.unihd.dbs.uima.reader.aceternreader] Cannot set Document Creation Time - no datetimetag found in " + filename + "!");
            } else {
                Dct dct = new Dct(jcas);
                dct.setBegin(0);
                dct.setEnd(1);
                dct.setFilename(filename);
                dct.setTimexId("dct");
                dct.setValue(date_value);
                dct.addToIndexes();
            }
        }
    }

    public String normDay(String day) {
        if (!day.matches("\\d\\d")) {
            if (day.equals("1")) {
                day = "01";
            } else if (day.equals("2")) {
                day = "02";
            } else if (day.equals("3")) {
                day = "03";
            } else if (day.equals("4")) {
                day = "04";
            } else if (day.equals("5")) {
                day = "05";
            } else if (day.equals("6")) {
                day = "06";
            } else if (day.equals("7")) {
                day = "07";
            } else if (day.equals("8")) {
                day = "08";
            } else if (day.equals("9")) {
                day = "09";
            }
        }
        return day;
    }

    public String normMonth(String month) {
        if (month.toLowerCase().startsWith("jan")) {
            month = "01";
        } else if (month.toLowerCase().startsWith("feb")) {
            month = "02";
        } else if (month.toLowerCase().startsWith("mar")) {
            month = "03";
        } else if (month.toLowerCase().startsWith("apr")) {
            month = "04";
        } else if (month.toLowerCase().startsWith("may")) {
            month = "05";
        } else if (month.toLowerCase().startsWith("jun")) {
            month = "06";
        } else if (month.toLowerCase().startsWith("jul")) {
            month = "07";
        } else if (month.toLowerCase().startsWith("aug")) {
            month = "08";
        } else if (month.toLowerCase().startsWith("sep")) {
            month = "09";
        } else if (month.toLowerCase().startsWith("oct")) {
            month = "10";
        } else if (month.toLowerCase().startsWith("nov")) {
            month = "11";
        } else if (month.toLowerCase().startsWith("dec")) {
            month = "12";
        }
        return month;
    }

    public void close() throws IOException {
    }

    public Progress[] getProgress() {
        return new Progress[]{new ProgressImpl(this.currentIndex, this.mFiles.size(), "entities")};
    }

    public int getNumberOfDocuments() {
        return this.mFiles.size();
    }

    public static Iterable<MatchResult> findMatches(Pattern pattern, CharSequence s) {
        ArrayList<MatchResult> results = new ArrayList<MatchResult>();
        Matcher m = pattern.matcher(s);
        while (m.find()) {
            results.add(m.toMatchResult());
        }
        return results;
    }
}

