/*
 * Decompiled with CFR 0.152.
 */
package jvntextpro;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import jvnpostag.MaxentTagger;
import jvnsegmenter.CRFSegmenter;
import jvnsensegmenter.JVnSenSegmenter;
import jvntextpro.conversion.CompositeUnicode2Unicode;
import jvntextpro.util.VnSyllParser;
import jvntokenizer.PennTokenizer;

public class JVnTextPro {
    JVnSenSegmenter vnSenSegmenter = null;
    CRFSegmenter vnSegmenter = null;
    MaxentTagger vnPosTagger = null;
    boolean isTokenization = false;
    public CompositeUnicode2Unicode convertor = new CompositeUnicode2Unicode();

    public boolean initSenSegmenter(String modelDir) {
        System.out.println("Initilize JVnSenSegmenter ...");
        this.vnSenSegmenter = new JVnSenSegmenter();
        if (!this.vnSenSegmenter.init(modelDir)) {
            System.out.println("Error while initilizing JVnSenSegmenter");
            this.vnSenSegmenter = null;
            return false;
        }
        return true;
    }

    public boolean initSegmenter(String modelDir) {
        System.out.println("Initilize JVnSegmenter ...");
        System.out.println(modelDir);
        this.vnSegmenter = new CRFSegmenter();
        try {
            this.vnSegmenter.init(modelDir);
        }
        catch (Exception e) {
            System.out.println("Error while initializing JVnSegmenter");
            this.vnSegmenter = null;
            return false;
        }
        return true;
    }

    public boolean initPosTagger(String modelDir) {
        try {
            this.vnPosTagger = new MaxentTagger(modelDir);
        }
        catch (Exception e) {
            System.out.println("Error while initializing POS TAgger");
            this.vnPosTagger = null;
            return false;
        }
        return true;
    }

    public void initSenTokenization() {
        this.isTokenization = true;
    }

    public String process(String text) {
        String ret = text;
        ret = this.convertor.convert(ret);
        ret = this.senSegment(ret);
        ret = this.senTokenize(ret);
        ret = this.wordSegment(ret);
        ret = this.postProcessing(ret);
        ret = this.posTagging(ret);
        return ret;
    }

    public String process(File infile) {
        try {
            String line;
            BufferedReader reader = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(infile), "UTF-8"));
            String data = "";
            while ((line = reader.readLine()) != null) {
                data = data + line + "\n";
            }
            reader.close();
            String ret = this.process(data);
            return ret;
        }
        catch (Exception e) {
            System.out.println(e.getMessage());
            e.printStackTrace();
            return "";
        }
    }

    public String senSegment(String text) {
        String ret = text;
        if (this.vnSenSegmenter != null) {
            ret = this.vnSenSegmenter.senSegment(text);
        }
        return ret.trim();
    }

    public String senTokenize(String text) {
        String ret = text;
        if (this.isTokenization) {
            ret = PennTokenizer.tokenize(text);
        }
        return ret.trim();
    }

    public String wordSegment(String text) {
        String ret = text;
        if (this.vnSegmenter == null) {
            return ret;
        }
        ret = this.vnSegmenter.segmenting(ret);
        return ret;
    }

    public String posTagging(String text) {
        String ret = text;
        if (this.vnPosTagger != null) {
            ret = this.vnPosTagger.tagging(text);
        }
        return ret;
    }

    public String postProcessing(String text) {
        String[] lines = text.split("\n");
        String ret = "";
        for (String line : lines) {
            String[] words = line.split("[ \t]");
            String templine = "";
            for (String currentWord : words) {
                String[] syllables = currentWord.split("_");
                boolean isContainNotValidSyll = false;
                for (String syllable : syllables) {
                    VnSyllParser parser = new VnSyllParser(syllable.toLowerCase());
                    if (parser.isValidVnSyllable()) continue;
                    isContainNotValidSyll = true;
                    break;
                }
                if (isContainNotValidSyll) {
                    String temp = "";
                    for (String syll : syllables) {
                        temp = temp + syll + " ";
                    }
                    templine = templine + temp.trim() + " ";
                    continue;
                }
                templine = templine + currentWord + " ";
            }
            ret = ret + templine.trim() + "\n";
        }
        return ret.trim();
    }
}

