/*
 * Decompiled with CFR 0.152.
 */
package jvnsegmenter;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;
import jvntextpro.data.DataReader;
import jvntextpro.data.Sentence;

public class WordDataReader
extends DataReader {
    protected boolean isTrainReading = false;
    protected String[] tags = new String[]{"B-W", "I-W", "O"};

    public WordDataReader() {
        this.isTrainReading = false;
    }

    public WordDataReader(boolean isTrainReading) {
        this.isTrainReading = isTrainReading;
    }

    @Override
    public List<Sentence> readFile(String datafile) {
        try {
            BufferedReader reader = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(datafile), "UTF-8"));
            String line = null;
            ArrayList<Sentence> data = new ArrayList<Sentence>();
            while ((line = reader.readLine()) != null) {
                Sentence sentence = new Sentence();
                if (line.startsWith("#")) continue;
                StringTokenizer tk = new StringTokenizer(line, " ");
                while (tk.hasMoreTokens()) {
                    String token;
                    String word = "";
                    String tag = null;
                    if (!this.isTrainReading) {
                        word = token = tk.nextToken();
                        sentence.addTWord(word, tag);
                        continue;
                    }
                    token = tk.nextToken();
                    for (int i = 0; i < this.tags.length; ++i) {
                        String labelPart = "/" + this.tags[i];
                        if (!token.endsWith(labelPart)) continue;
                        word = token.substring(0, token.length() - labelPart.length());
                        tag = this.tags[i];
                        break;
                    }
                    sentence.addTWord(word, tag);
                }
                data.add(sentence);
            }
            reader.close();
            return data;
        }
        catch (Exception e) {
            System.out.println(e.getMessage());
            return new ArrayList<Sentence>();
        }
    }

    @Override
    public List<Sentence> readString(String dataStr) {
        String[] lines = dataStr.split("\n");
        ArrayList<Sentence> data = new ArrayList<Sentence>();
        for (String line : lines) {
            Sentence sentence = new Sentence();
            if (line.startsWith("#")) continue;
            StringTokenizer tk = new StringTokenizer(line, " ");
            while (tk.hasMoreTokens()) {
                String token;
                String word = "";
                String tag = null;
                if (!this.isTrainReading) {
                    word = token = tk.nextToken();
                    sentence.addTWord(word, tag);
                    continue;
                }
                token = tk.nextToken();
                StringTokenizer sltk = new StringTokenizer(token, "/");
                word = sltk.nextToken();
                tag = sltk.nextToken();
                sentence.addTWord(word, tag);
            }
            data.add(sentence);
        }
        return data;
    }
}

