/*
 * Decompiled with CFR 0.152.
 */
package de.unihd.dbs.uima.reader.eventi2014reader;

import de.unihd.dbs.uima.annotator.heideltime.utilities.Logger;
import de.unihd.dbs.uima.annotator.heideltime.utilities.Toolbox;
import de.unihd.dbs.uima.types.heideltime.Dct;
import de.unihd.dbs.uima.types.heideltime.Sentence;
import de.unihd.dbs.uima.types.heideltime.Token;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Queue;
import java.util.regex.MatchResult;
import java.util.regex.Pattern;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.collection.CollectionReader_ImplBase;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.FileUtils;
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;

public class Eventi2014Reader
extends CollectionReader_ImplBase {
    private Class<?> component = ((Object)((Object)this)).getClass();
    private String PARAM_INPUTDIR = "InputDirectory";
    private Integer numberOfDocuments = 0;
    private HashSet<String> hsNoSpaceBefore = new HashSet();
    private HashSet<String> hsNoSpaceBehind = new HashSet();
    private Queue<File> files = new LinkedList<File>();

    public void initialize() throws ResourceInitializationException {
        String dirPath = (String)this.getConfigParameterValue(this.PARAM_INPUTDIR);
        dirPath = dirPath.trim();
        this.hsNoSpaceBefore.add(".");
        this.hsNoSpaceBefore.add(",");
        this.hsNoSpaceBefore.add(":");
        this.hsNoSpaceBefore.add(";");
        this.hsNoSpaceBefore.add("?");
        this.hsNoSpaceBefore.add("!");
        this.hsNoSpaceBefore.add(")");
        this.hsNoSpaceBehind.add("(");
        this.populateFileList(dirPath);
    }

    public void getNext(CAS aCAS) throws IOException, CollectionException {
        JCas jcas;
        try {
            jcas = aCAS.getJCas();
        }
        catch (CASException e) {
            throw new CollectionException((Throwable)e);
        }
        this.fillJCas(jcas);
        System.err.print(".");
    }

    private void fillJCas(JCas jcas) throws IOException, CollectionException {
        File f = this.files.poll();
        String text = "";
        String xml = FileUtils.file2String((File)f);
        String[] lines = xml.split("\n");
        String fullDctTag = "";
        String dct = "";
        String filename = "";
        String lastTok = "";
        int sentBegin = 0;
        int sentEnd = -1;
        for (String line : lines) {
            Pattern paConstraint;
            if (line.startsWith("<Document doc_name=")) {
                paConstraint = Pattern.compile("<Document doc_name=\"(.*?)\">");
                for (MatchResult mr : Toolbox.findMatches(paConstraint, line)) {
                    filename = mr.group(1);
                }
            }
            if (line.startsWith("<token")) {
                paConstraint = Pattern.compile("<token t_id=\"(.*?)\" sentence=\"(.*?)\" number=\"(.*?)\">(.*?)</token>");
                for (MatchResult mr : Toolbox.findMatches(paConstraint, line)) {
                    int tokEnd;
                    int tokBegin;
                    String token = mr.group(4);
                    int tokID = Integer.parseInt(mr.group(1));
                    int sentNum = Integer.parseInt(mr.group(2));
                    int tokNum = Integer.parseInt(mr.group(3));
                    if (text.equals("")) {
                        tokBegin = 0;
                        tokEnd = token.length();
                        text = token;
                        lastTok = token;
                    } else if (this.hsNoSpaceBefore.contains(token)) {
                        tokBegin = text.length();
                        tokEnd = tokBegin + token.length();
                        text = text + token;
                        lastTok = token;
                    } else {
                        if (!this.hsNoSpaceBehind.contains(lastTok)) {
                            tokBegin = text.length() + 1;
                            text = text + " " + token;
                        } else {
                            tokBegin = text.length();
                            text = text + token;
                        }
                        tokEnd = tokBegin + token.length();
                        lastTok = token;
                    }
                    if (tokNum == 0) {
                        if (sentEnd >= 0) {
                            this.addSentenceAnnotation(jcas, sentBegin, sentEnd, filename);
                        }
                        sentBegin = tokBegin;
                    }
                    this.addTokenAnnotation(jcas, tokBegin, tokEnd, tokID, filename, sentNum, tokNum);
                    sentEnd = tokEnd;
                }
            }
            if (!line.startsWith("<TIMEX3")) continue;
            paConstraint = Pattern.compile("(<TIMEX3 .*? TAG_DESCRIPTOR=\"D[CP]T\" .*? value=\"(.*?)\".*?/>)");
            for (MatchResult mr : Toolbox.findMatches(paConstraint, line)) {
                fullDctTag = mr.group(1);
                dct = mr.group(2);
                System.err.println("DCT: " + dct);
            }
        }
        this.addSentenceAnnotation(jcas, sentBegin, sentEnd, filename);
        jcas.setDocumentText(text);
        if (!dct.equals("")) {
            Dct dctAnnotation = new Dct(jcas);
            dctAnnotation.setBegin(0);
            dctAnnotation.setEnd(text.length());
            dctAnnotation.setFilename(filename + "---" + fullDctTag);
            dctAnnotation.setValue(dct);
            dctAnnotation.addToIndexes();
        }
    }

    public void addSentenceAnnotation(JCas jcas, int begin, int end, String filename) {
        Sentence sentAnnotation = new Sentence(jcas);
        sentAnnotation.setBegin(begin);
        sentAnnotation.setEnd(end);
        sentAnnotation.setFilename(filename);
        sentAnnotation.addToIndexes();
    }

    public void addTokenAnnotation(JCas jcas, int begin, int end, int tokID, String filename, int sentNum, int tokNum) {
        Token tokenAnnotation = new Token(jcas);
        tokenAnnotation.setBegin(begin);
        tokenAnnotation.setEnd(end);
        tokenAnnotation.setTokenId(tokID);
        tokenAnnotation.setFilename(filename + "---" + sentNum + "---" + tokNum);
        tokenAnnotation.addToIndexes();
    }

    public boolean hasNext() throws IOException, CollectionException {
        return this.files.size() > 0;
    }

    public Progress[] getProgress() {
        return new Progress[]{new ProgressImpl(this.numberOfDocuments - this.files.size(), this.numberOfDocuments.intValue(), "entities")};
    }

    public void close() throws IOException {
        this.files.clear();
    }

    private void populateFileList(String dirPath) throws ResourceInitializationException {
        ArrayList<File> myFiles = new ArrayList<File>();
        File dir = new File(dirPath);
        if (!dir.exists() || !dir.isDirectory()) {
            throw new ResourceInitializationException();
        }
        myFiles.addAll(Arrays.asList(dir.listFiles()));
        for (File f : myFiles) {
            if (!(f.exists() && f.isFile() && f.canRead())) {
                Logger.printDetail(this.component, "File \"" + f.getAbsolutePath() + "\" was ignored because it either didn't exist, wasn't a file or wasn't readable.");
                continue;
            }
            this.files.add(f);
        }
        this.numberOfDocuments = this.files.size();
    }
}

