/*
 * Decompiled with CFR 0.152.
 */
package jvnsensegmenter;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FilenameFilter;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.StringTokenizer;
import jmaxent.Classification;
import jvnsensegmenter.FeatureGenerator;

public class JVnSenSegmenter {
    public static String positiveLabel = "y";
    public Classification classifier = null;
    public FeatureGenerator feaGen = null;

    public boolean init(String modelDir) {
        try {
            this.classifier = new Classification(modelDir);
            this.feaGen = new FeatureGenerator();
            this.classifier.init();
            return true;
        }
        catch (Exception e) {
            System.out.println("Error while initilizing classifier: " + e.getMessage());
            return false;
        }
    }

    public String senSegment(String text) {
        text = text.replaceAll("([\t \n])+", "$1");
        ArrayList markList = new ArrayList();
        List data = FeatureGenerator.doFeatureGen(new HashMap(), text, markList, false);
        if (markList.isEmpty()) {
            return text + "\n";
        }
        List labels = this.classifier.classify(data);
        String result = text.substring(0, (Integer)markList.get(0));
        for (int i = 0; i < markList.size(); ++i) {
            int curPos = (Integer)markList.get(i);
            result = ((String)labels.get(i)).equals(positiveLabel) ? result + " " + text.charAt(curPos) + "\n" : result + text.charAt(curPos);
            if (i >= markList.size() - 1) continue;
            int nexPos = (Integer)markList.get(i + 1);
            result = result + text.substring(curPos + 1, nexPos);
        }
        int finalMarkPos = (Integer)markList.get(markList.size() - 1);
        result = result + text.substring(finalMarkPos + 1, text.length());
        result = result.replaceAll("\n ", "\n");
        result = result.replaceAll("\n\n", "\n");
        result = result.replaceAll("\\.\\. \\.", "...");
        return result;
    }

    public void senSegment(String text, List senList) {
        senList.clear();
        String resultStr = this.senSegment(text);
        StringTokenizer senTknr = new StringTokenizer(resultStr, "\n");
        while (senTknr.hasMoreTokens()) {
            senList.add(senTknr.nextToken());
        }
    }

    public static void main(String[] args) {
        if (args.length != 4) {
            JVnSenSegmenter.displayHelp();
            System.exit(1);
        }
        try {
            JVnSenSegmenter senSegmenter = new JVnSenSegmenter();
            senSegmenter.init(args[1]);
            String option = args[2];
            if (option.equalsIgnoreCase("-inputfile")) {
                JVnSenSegmenter.senSegmentFile(args[3], args[3] + ".sent", senSegmenter);
            } else if (option.equalsIgnoreCase("-inputdir")) {
                File inputDir = new File(args[3]);
                File[] childrent = inputDir.listFiles(new FilenameFilter(){

                    @Override
                    public boolean accept(File dir, String name) {
                        return name.endsWith(".txt");
                    }
                });
                for (int i = 0; i < childrent.length; ++i) {
                    System.out.println("Segmenting sentences in " + childrent[i]);
                    JVnSenSegmenter.senSegmentFile(childrent[i].getPath(), childrent[i].getPath() + ".sent", senSegmenter);
                }
            } else {
                JVnSenSegmenter.displayHelp();
            }
        }
        catch (Exception e) {
            System.out.println(e.getMessage());
            return;
        }
    }

    private static void senSegmentFile(String infile, String outfile, JVnSenSegmenter senSegmenter) {
        try {
            BufferedReader in = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(infile), "UTF-8"));
            BufferedWriter out = new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(outfile), "UTF-8"));
            String para = "";
            String line = "";
            String text = "";
            while ((line = in.readLine()) != null) {
                if (!line.equals("")) {
                    if (line.charAt(0) == '#') {
                        text = text + line + "\n";
                        continue;
                    }
                    para = senSegmenter.senSegment(line).trim();
                    text = text + para.trim() + "\n\n";
                    continue;
                }
                text = text + "\n";
            }
            text = text.trim();
            out.write(text);
            out.newLine();
            in.close();
            out.close();
        }
        catch (Exception e) {
            System.out.println("Error in sensegment file " + infile);
        }
    }

    public static void displayHelp() {
        System.out.println("Usage:");
        System.out.println("\tCase 1: JVnSenSegmenter -modeldir <model directory> -inputfile <input data file>");
        System.out.println("\tCase 2: JVnSenSegmenter -modeldir <model directory> -inputdir <input data directory>");
        System.out.println("Where:");
        System.out.println("\t<model directory> is the directory contain the model and option files");
        System.out.println("\t<input data file> is the file containing input text that need to");
        System.out.println("\thave sentences segmented (each sentence on a line)");
        System.out.println("\t<input data directory> is the directory containing multiple input .tkn files");
        System.out.println();
    }
}

