/*
 * Decompiled with CFR 0.152.
 */
package jvnsensegmenter;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import jvntextpro.util.StringUtils;

public class FeatureGenerator {
    public static void main(String[] args) {
        if (args.length != 3) {
            FeatureGenerator.printUsage();
            System.exit(1);
        }
        boolean label = args[0].toLowerCase().trim().equals("-lbl");
        try {
            String inputWhat = args[1].toLowerCase().trim();
            if (inputWhat.equals("-inputfile")) {
                BufferedReader in = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(args[2]), "UTF-8"));
                BufferedWriter out = new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(args[2] + ".tagged"), "UTF-8"));
                String text = "";
                String line = "";
                while ((line = in.readLine()) != null) {
                    text = text + "\n" + line;
                }
                text = text.trim();
                text = text.replaceAll("([\t\n\r ])+", "$1");
                text = text.replaceAll("[\\[\\]]", "");
                text = text.replaceAll("<[^<>]*>", "");
                ArrayList MarkList = new ArrayList();
                ArrayList recordList = (ArrayList)FeatureGenerator.doFeatureGen(new HashMap(), text, MarkList, label);
                for (int i = 0; i < recordList.size(); ++i) {
                    out.write(recordList.get(i).toString());
                    out.write("\n");
                }
                in.close();
                out.close();
            } else if (inputWhat.equals("-inputdir")) {
                BufferedWriter out = new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(args[2] + ".tagged"), "UTF-8"));
                File inputDir = new File(args[2]);
                File[] childrent = inputDir.listFiles();
                for (int i = 0; i < childrent.length; ++i) {
                    BufferedReader in = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(childrent[i]), "UTF-8"));
                    String text = "";
                    String line = "";
                    while ((line = in.readLine()) != null) {
                        text = text + "\n" + line;
                    }
                    text = text.trim();
                    text = text.replaceAll("([\t\n\r ])+", "$1");
                    text = text.replaceAll("[\\[\\]{}]", "");
                    text = text.replaceAll("<[^<>]*>", "");
                    ArrayList MarkList = new ArrayList();
                    ArrayList recordList = (ArrayList)FeatureGenerator.doFeatureGen(new HashMap(), text, MarkList, label);
                    for (int j = 0; j < recordList.size(); ++j) {
                        out.write(recordList.get(j).toString());
                        out.write("\n");
                    }
                    in.close();
                }
                out.close();
            } else {
                FeatureGenerator.printUsage();
            }
        }
        catch (Exception e) {
            System.out.println("In feature generator main : " + e.getMessage());
            return;
        }
    }

    public static void printUsage() {
        System.out.println("Usage: FeatureGeneration -lbl/-unlbl -inputfile/-inputdir [input file/input dir]");
    }

    public static void readAbbrList(String dataFile, Map map) throws IOException {
        String line;
        BufferedReader fin = new BufferedReader(new FileReader(dataFile));
        while ((line = fin.readLine()) != null) {
            StringTokenizer strTok = new StringTokenizer(line, " \t\r\n");
            if (strTok.countTokens() <= 0) continue;
            String token = strTok.nextToken();
            map.put(token.toLowerCase(), token.toLowerCase());
        }
    }

    public static List doFeatureGen(Map map, String text, List markList, boolean label) {
        markList.clear();
        int nextPos = 0;
        while ((nextPos = StringUtils.findFirstOf(text, ".!?", nextPos + 1)) != -1) {
            markList.add(new Integer(nextPos));
        }
        ArrayList<String> results = new ArrayList<String>();
        for (int i = 0; i < markList.size(); ++i) {
            int curPos = (Integer)markList.get(i);
            String record = FeatureGenerator.genCPs(map, text, curPos);
            if (label) {
                int idx = StringUtils.findFirstNotOf(text, " \t", curPos + 1);
                record = idx == -1 || text.charAt(idx) == '\n' ? record + " y" : record + " n";
            }
            results.add(record);
        }
        return results;
    }

    private static String genCPs(Map map, String text, int position) {
        String token = "";
        String suffix = "";
        String prefix = "";
        int idx1 = -1;
        int idx2 = -1;
        idx1 = StringUtils.findLastOf(text, " \t\n\r", position);
        if (idx1 == -1) {
            idx1 = 0;
        }
        if ((idx2 = StringUtils.findFirstOf(text, " \t\n\r", position + 1)) == -1) {
            idx2 = text.length();
        }
        token = text.substring(idx1 + 1, idx2);
        if (position + 1 < idx2) {
            suffix = text.substring(position + 1, idx2).trim();
        }
        if (idx1 + 1 < position) {
            prefix = text.substring(idx1 + 1, position).trim();
        }
        int idx = idx2;
        String preToken = "";
        if (idx1 != 0) {
            idx2 = StringUtils.findLastNotOf(text, " \t\n\r", idx1);
            if ((idx1 = StringUtils.findLastOf(text, " \t\n\r", idx2)) == -1) {
                idx1 = 0;
            }
            if (idx2 != -1) {
                preToken = text.substring(idx1, idx2 + 1).trim();
            }
        }
        String nexToken = "";
        idx2 = idx;
        if (idx2 != text.length()) {
            idx1 = StringUtils.findFirstNotOf(text, " \t\n\r", idx2 + 1);
            if ((idx2 = StringUtils.findFirstOf(text, " \t\n\r", idx1)) == -1) {
                idx2 = text.length();
            }
            if (idx1 != -1) {
                nexToken = text.substring(idx1, idx2).trim();
            }
        }
        String cps = "";
        cps = cps + " 01=" + token;
        cps = cps + " 02=" + token.toLowerCase();
        if (StringUtils.isFirstCap(token)) {
            cps = cps + " 03";
        }
        if (map.containsKey(token.toLowerCase())) {
            cps = cps + " 04";
        }
        if (StringUtils.containNumber(token)) {
            cps = cps + " 05";
        }
        if (StringUtils.containLetter(token)) {
            cps = cps + " 06";
        }
        if (StringUtils.containLetterAndDigit(token)) {
            cps = cps + " 07";
        }
        if (StringUtils.isAllNumber(token)) {
            cps = cps + " 08";
        }
        cps = cps + " 09=" + Integer.toString(StringUtils.countStops(token));
        cps = cps + " 10=" + Integer.toString(StringUtils.countPuncs(token));
        cps = cps + " 11=" + prefix;
        cps = cps + " 12=" + prefix.toLowerCase();
        if (StringUtils.isFirstCap(prefix)) {
            cps = cps + " 13";
        }
        cps = cps + " 14=" + suffix;
        cps = cps + " 15=" + suffix.toLowerCase();
        if (StringUtils.isFirstCap(suffix)) {
            cps = cps + " 16";
        }
        if (preToken != "") {
            cps = cps + " 17=" + preToken;
            cps = cps + " 18=" + preToken.toLowerCase();
            if (StringUtils.isFirstCap(preToken)) {
                cps = cps + " 19";
            }
            if (map.containsKey(preToken.toLowerCase())) {
                cps = cps + " 20";
            }
            if (StringUtils.containNumber(preToken)) {
                cps = cps + " 21";
            }
            if (StringUtils.containLetter(preToken)) {
                cps = cps + " 22";
            }
            if (StringUtils.containLetterAndDigit(preToken)) {
                cps = cps + " 23";
            }
            if (StringUtils.isAllNumber(preToken)) {
                cps = cps + " 24";
            }
            cps = cps + " 25=" + Integer.toString(StringUtils.countStops(preToken));
            cps = cps + " 26=" + Integer.toString(StringUtils.countPuncs(preToken));
        } else {
            cps = cps + " 27=null";
        }
        if (nexToken != "") {
            cps = cps + " 28=" + nexToken;
            cps = cps + " 29=" + nexToken.toLowerCase();
            if (StringUtils.isFirstCap(nexToken)) {
                cps = cps + " 30";
            }
            if (map.containsKey(nexToken.toLowerCase())) {
                cps = cps + " 31";
            }
            if (nexToken.startsWith("\"") || nexToken.startsWith("''") || nexToken.startsWith("``") || nexToken.startsWith("'") || nexToken.startsWith("`")) {
                cps = cps + " 39";
            }
            if (StringUtils.isFirstCap(nexToken)) {
                cps = cps + " 40";
            }
            if (StringUtils.containNumber(nexToken)) {
                cps = cps + " 32";
            }
            if (StringUtils.containLetter(nexToken)) {
                cps = cps + " 33";
            }
            if (StringUtils.containLetterAndDigit(nexToken)) {
                cps = cps + " 34";
            }
            if (StringUtils.isAllNumber(nexToken)) {
                cps = cps + " 35";
            }
            cps = cps + " 36=" + Integer.toString(StringUtils.countStops(nexToken));
            cps = cps + " 37=" + Integer.toString(StringUtils.countPuncs(nexToken));
        } else {
            cps = cps + " 38=null";
        }
        if (token.contains("@")) {
            cps = cps + " 39";
        }
        cps = cps + " 40=" + prefix.length();
        cps = cps + " 41=" + suffix.length();
        if (token.contains("/")) {
            cps = cps + " 42";
        }
        if (nexToken != "") {
            cps = cps + " 43=" + nexToken.charAt(0);
        }
        return cps.trim();
    }
}

