/*
 * Decompiled with CFR 0.152.
 */
package jvnpostag;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;
import jvntextpro.data.DataReader;
import jvntextpro.data.Sentence;
import jvntextpro.util.StringUtils;

public class POSDataReader
extends DataReader {
    protected String[] tags = new String[]{"N", "Np", "Nc", "Nu", "V", "A", "P", "L", "M", "R", "E", "C", "I", "T", "B", "Y", "X", "Ny", "Nb", "Vb", "Mrk"};
    protected boolean isTrainReading = false;

    public POSDataReader() {
    }

    public POSDataReader(boolean isTrainReading) {
        this.isTrainReading = isTrainReading;
    }

    @Override
    public List<Sentence> readFile(String datafile) {
        try {
            BufferedReader reader = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(datafile), "UTF-8"));
            String line = null;
            ArrayList<Sentence> data = new ArrayList<Sentence>();
            while ((line = reader.readLine()) != null) {
                Sentence sentence = new Sentence();
                boolean error = false;
                if (line.startsWith("#")) continue;
                StringTokenizer tk = new StringTokenizer(line, " ");
                while (tk.hasMoreTokens()) {
                    String word = "";
                    String tag = null;
                    String token = tk.nextToken();
                    if (this.isTrainReading) {
                        if (token == "/") {
                            word = "/";
                            tag = "Mrk";
                            continue;
                        }
                        if (token == "///") {
                            word = "/";
                            tag = "Mrk";
                            continue;
                        }
                        String[] fields = token.split("/");
                        if (fields.length == 1) {
                            error = true;
                            break;
                        }
                        if (fields.length == 2) {
                            word = fields[0];
                            tag = fields[1];
                        } else if (fields.length > 2) {
                            tag = fields[fields.length - 1];
                            for (int i = 0; i < fields.length - 2; ++i) {
                                word = word + fields[i] + "/";
                            }
                            word = word + fields[fields.length - 2];
                        }
                        if (tag != null) {
                            if (StringUtils.isPunc(tag)) {
                                sentence.addTWord(word, "Mrk");
                                continue;
                            }
                            boolean found = false;
                            for (int i = 0; i < this.tags.length; ++i) {
                                if (!tag.equalsIgnoreCase(this.tags[i])) continue;
                                tag = this.tags[i];
                                found = true;
                                break;
                            }
                            if (!found) {
                                error = true;
                                System.out.println("error");
                                System.out.println(tag);
                            }
                            sentence.addTWord(word, tag);
                            continue;
                        }
                        error = true;
                        break;
                    }
                    word = token;
                    tag = null;
                    sentence.addTWord(word, tag);
                }
                if (error) continue;
                data.add(sentence);
            }
            reader.close();
            return data;
        }
        catch (Exception e) {
            System.out.println("Error while reading data!");
            e.printStackTrace();
            return null;
        }
    }

    @Override
    public List<Sentence> readString(String dataStr) {
        String[] lines = dataStr.split("\n");
        ArrayList<Sentence> data = new ArrayList<Sentence>();
        for (String line : lines) {
            Sentence sentence = new Sentence();
            StringTokenizer tk = new StringTokenizer(line, " ");
            while (tk.hasMoreTokens()) {
                String token;
                if (this.isTrainReading) {
                    token = tk.nextToken();
                    String[] fields = token.split("/");
                    if (fields.length <= 0) continue;
                    String word = fields[0];
                    String tag = null;
                    if (fields.length == 2) {
                        tag = fields[1];
                    }
                    sentence.addTWord(word, tag);
                    continue;
                }
                token = tk.nextToken();
                sentence.addTWord(token, null);
            }
            data.add(sentence);
        }
        return data;
    }
}

