/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.wordseg;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.sequences.SeqClassifierFlags;
import edu.stanford.nlp.trees.international.pennchinese.ChineseUtils;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class ChineseStringUtils {
    private static final boolean DEBUG = false;
    private static final Pattern percentsPat = Pattern.compile("[\\s\\p{Zs}]*([\uff05%])[\\s\\p{Zs}]*");
    private static final String percentStr = "[\\s\\p{Zs}]+([\uff05%])";
    private static final HKPostProcessor hkPostProcessor = new HKPostProcessor();
    private static final ASPostProcessor asPostProcessor = new ASPostProcessor();
    private static final BaseChinesePostProcessor basicPostsProcessor = new BaseChinesePostProcessor();
    private static final CTPPostProcessor ctpPostProcessor = new CTPPostProcessor();
    private static final PKPostProcessor pkPostProcessor = new PKPostProcessor();

    private ChineseStringUtils() {
    }

    public static boolean isLetterASCII(char c) {
        return c <= '\u007f' && Character.isLetter(c);
    }

    public static String combineSegmentedSentence(List<CoreLabel> doc, SeqClassifierFlags flags) {
        int testContentIdx = 0;
        StringBuilder ans = new StringBuilder();
        StringBuilder unmod_ans = new StringBuilder();
        StringBuilder unmod_normed_ans = new StringBuilder();
        CoreLabel wi = null;
        Iterator<CoreLabel> wordIter = doc.iterator();
        while (wordIter.hasNext()) {
            char currChar;
            char prevChar;
            boolean seg;
            CoreLabel pwi = wi;
            wi = wordIter.next();
            boolean originalWhiteSpace = "1".equals(wi.get(CoreAnnotations.SpaceBeforeAnnotation.class));
            if (((String)wi.get(CoreAnnotations.AnswerAnnotation.class)).equals("1") && !"0".equals(String.valueOf(wi.get(CoreAnnotations.PositionAnnotation.class)))) {
                seg = true;
                if (flags.keepEnglishWhitespaces && testContentIdx > 0) {
                    prevChar = ((String)pwi.get(CoreAnnotations.OriginalCharAnnotation.class)).charAt(0);
                    currChar = ((String)wi.get(CoreAnnotations.OriginalCharAnnotation.class)).charAt(0);
                    if (ChineseStringUtils.isLetterASCII(prevChar) && ChineseStringUtils.isLetterASCII(currChar) && !originalWhiteSpace) {
                        seg = false;
                    }
                }
                if (flags.keepAllWhitespaces && originalWhiteSpace) {
                    seg = true;
                }
                if (seg) {
                    if (originalWhiteSpace) {
                        ans.append('\u1924');
                    } else {
                        ans.append(' ');
                    }
                }
                unmod_ans.append(' ');
                unmod_normed_ans.append(' ');
            } else {
                seg = false;
                if (!(testContentIdx <= 0 || (prevChar = ((String)pwi.get(CoreAnnotations.OriginalCharAnnotation.class)).charAt(0)) < '\u0080' == (currChar = ((String)wi.get(CoreAnnotations.OriginalCharAnnotation.class)).charAt(0)) < '\u0080' || ChineseUtils.isNumber(prevChar) && ChineseUtils.isNumber(currChar) || !flags.separateASCIIandRange)) {
                    seg = true;
                }
                if (flags.keepEnglishWhitespaces && testContentIdx > 0) {
                    prevChar = ((String)pwi.get(CoreAnnotations.OriginalCharAnnotation.class)).charAt(0);
                    currChar = ((String)wi.get(CoreAnnotations.OriginalCharAnnotation.class)).charAt(0);
                    if ((ChineseStringUtils.isLetterASCII(prevChar) && ChineseStringUtils.isLetterASCII(currChar) || ChineseStringUtils.isLetterASCII(prevChar) && ChineseUtils.isNumber(currChar) || ChineseUtils.isNumber(prevChar) && ChineseStringUtils.isLetterASCII(currChar)) && "1".equals(wi.get(CoreAnnotations.SpaceBeforeAnnotation.class))) {
                        seg = true;
                    }
                }
                if (flags.keepAllWhitespaces && !"0".equals(String.valueOf(wi.get(CoreAnnotations.PositionAnnotation.class))) && "1".equals(wi.get(CoreAnnotations.SpaceBeforeAnnotation.class))) {
                    seg = true;
                }
                if (seg) {
                    if (originalWhiteSpace) {
                        ans.append('\u1924');
                    } else {
                        ans.append(' ');
                    }
                }
            }
            ans.append((String)wi.get(CoreAnnotations.OriginalCharAnnotation.class));
            unmod_ans.append((String)wi.get(CoreAnnotations.OriginalCharAnnotation.class));
            unmod_normed_ans.append((String)wi.get(CoreAnnotations.CharAnnotation.class));
            ++testContentIdx;
        }
        String ansStr = ans.toString();
        if (flags.sighanPostProcessing) {
            if (!flags.keepAllWhitespaces) {
                ansStr = ansStr.replaceAll("\u1924", " ");
            }
            ansStr = ChineseStringUtils.postProcessingAnswer(ansStr, flags);
        }
        ansStr = ansStr.replaceAll("\u1924", " ");
        return ansStr;
    }

    private static String postProcessingAnswer(String ans, SeqClassifierFlags flags) {
        if (flags.useHk) {
            return hkPostProcessor.postProcessingAnswer(ans);
        }
        if (flags.useAs) {
            return asPostProcessor.postProcessingAnswer(ans);
        }
        if (flags.usePk) {
            return pkPostProcessor.postProcessingAnswer(ans, flags.keepAllWhitespaces);
        }
        if (flags.useMsr) {
            return basicPostsProcessor.postProcessingAnswer(ans);
        }
        return ctpPostProcessor.postProcessingAnswer(ans, flags.suppressMidDotPostprocessing);
    }

    static class BaseChinesePostProcessor {
        protected static final ConcurrentHashMap<String, Pattern> patternMap = new ConcurrentHashMap();
        protected Character[] puncs;
        private Pattern[] colonsPat = null;
        private final Character[] colons = new Character[]{Character.valueOf('\ufe55'), Character.valueOf(':'), Character.valueOf('\uff1a')};
        private Pattern percentsWhitePat;
        private Pattern[] colonsWhitePat = null;

        public BaseChinesePostProcessor() {
            this.puncs = new Character[]{Character.valueOf('\u3001'), Character.valueOf('\u3002'), Character.valueOf('\u3003'), Character.valueOf('\u3008'), Character.valueOf('\u3009'), Character.valueOf('\u300a'), Character.valueOf('\u300b'), Character.valueOf('\u300c'), Character.valueOf('\u300d'), Character.valueOf('\u300e'), Character.valueOf('\u300f'), Character.valueOf('\u3010'), Character.valueOf('\u3011'), Character.valueOf('\u3014'), Character.valueOf('\u3015')};
        }

        public String postProcessingAnswer(String ans) {
            return this.separatePuncs(ans);
        }

        String separatePuncs(String ans) {
            Pattern[] puncsPat = this.compilePunctuationPatterns();
            for (int i = 0; i < puncsPat.length; ++i) {
                Pattern p = puncsPat[i];
                Character punc = this.puncs[i];
                Matcher m = p.matcher(ans);
                ans = m.replaceAll(" " + punc + " ");
            }
            return ans.trim();
        }

        private Pattern[] compilePunctuationPatterns() {
            Pattern[] puncsPat = new Pattern[this.puncs.length];
            for (int i = 0; i < this.puncs.length; ++i) {
                Character punc = this.puncs[i];
                puncsPat[i] = patternMap.computeIfAbsent(BaseChinesePostProcessor.getEscapedPuncPattern(punc), s -> Pattern.compile(s));
            }
            return puncsPat;
        }

        private static String getEscapedPuncPattern(Character punc) {
            String pattern = punc.charValue() == '(' || punc.charValue() == ')' ? "[\\s\\p{Zs}]*\\" + punc + "[\\s\\p{Zs}]*" : "[\\s\\p{Zs}]*" + punc + "[\\s\\p{Zs}]*";
            return pattern;
        }

        protected String processColons(String ans, String numPat) {
            Matcher m;
            Pattern p;
            Character colon;
            int i;
            this.compileColonPatterns();
            for (i = 0; i < this.colons.length; ++i) {
                colon = this.colons[i];
                p = this.colonsPat[i];
                m = p.matcher(ans);
                ans = m.replaceAll(" " + colon + " ");
            }
            this.compileColonsWhitePatterns(numPat);
            for (i = 0; i < this.colons.length; ++i) {
                colon = this.colons[i];
                p = this.colonsWhitePat[i];
                m = p.matcher(ans);
                while (m.find()) {
                    ans = m.replaceAll("$1" + colon + "$2");
                    m = p.matcher(ans);
                }
            }
            ans = ans.trim();
            return ans;
        }

        private synchronized void compileColonsWhitePatterns(String numPat) {
            if (this.colonsWhitePat == null) {
                this.colonsWhitePat = new Pattern[this.colons.length];
                for (int i = 0; i < this.colons.length; ++i) {
                    Character colon = this.colons[i];
                    String pattern = "(" + numPat + ")" + "[\\s\\p{Zs}]+" + colon + "[\\s\\p{Zs}]+" + "(" + numPat + ")";
                    this.colonsWhitePat[i] = patternMap.computeIfAbsent(pattern, s -> Pattern.compile(s));
                }
            }
        }

        private synchronized void compileColonPatterns() {
            if (this.colonsPat == null) {
                this.colonsPat = new Pattern[this.colons.length];
                for (int i = 0; i < this.colons.length; ++i) {
                    Character colon = this.colons[i];
                    this.colonsPat[i] = patternMap.computeIfAbsent("[\\s\\p{Zs}]*" + colon + "[\\s\\p{Zs}]*", s -> Pattern.compile(s));
                }
            }
        }

        protected String processPercents(String ans, String numPat) {
            Matcher m = percentsPat.matcher(ans);
            ans = m.replaceAll(" $1 ");
            this.percentsWhitePat = patternMap.computeIfAbsent("(" + numPat + ")" + ChineseStringUtils.percentStr, s -> Pattern.compile(s));
            Matcher m2 = this.percentsWhitePat.matcher(ans);
            ans = m2.replaceAll("$1$2");
            ans = ans.trim();
            return ans;
        }

        protected static String processDots(String ans, String numPat) {
            String dots = "[\ufe52\u2027\uff0e.]";
            Pattern p = patternMap.computeIfAbsent("(" + numPat + ")" + "[\\s\\p{Zs}]+" + "(" + dots + ")" + "[\\s\\p{Zs}]+" + "(" + numPat + ")", s -> Pattern.compile(s));
            Matcher m = p.matcher(ans);
            while (m.find()) {
                ans = m.replaceAll("$1$2$3");
                m = p.matcher(ans);
            }
            p = patternMap.computeIfAbsent("(" + numPat + ")(" + dots + ")" + "[\\s\\p{Zs}]+" + "(" + numPat + ")", s -> Pattern.compile(s));
            m = p.matcher(ans);
            while (m.find()) {
                ans = m.replaceAll("$1$2$3");
                m = p.matcher(ans);
            }
            p = patternMap.computeIfAbsent("(" + numPat + ")" + "[\\s\\p{Zs}]+" + "(" + dots + ")(" + numPat + ")", s -> Pattern.compile(s));
            m = p.matcher(ans);
            while (m.find()) {
                ans = m.replaceAll("$1$2$3");
                m = p.matcher(ans);
            }
            ans = ans.trim();
            return ans;
        }

        protected static String gluePunc(Character punc, String ans) {
            Pattern p = patternMap.computeIfAbsent("[\\s\\p{Zs}]*" + punc, s -> Pattern.compile(s));
            Matcher m = p.matcher(ans);
            ans = m.replaceAll(String.valueOf(punc));
            p = patternMap.computeIfAbsent(punc + "[\\s\\p{Zs}]*", s -> Pattern.compile(s));
            m = p.matcher(ans);
            ans = m.replaceAll(String.valueOf(punc));
            ans = ans.trim();
            return ans;
        }

        protected static String processCommas(String ans) {
            String numPat = "[0-9\uff10-\uff19]";
            String nonNumPat = "[^0-9\uff10-\uff19]";
            String commas = ",";
            ans = ans.replaceAll(",", " , ");
            ans = ans.replaceAll("  ", " ");
            Pattern p = patternMap.computeIfAbsent("(" + numPat + ")" + "[\\s\\p{Zs}]*" + "(" + commas + ")" + "[\\s\\p{Zs}]*" + "(" + numPat + "{3}" + nonNumPat + ")", s -> Pattern.compile(s));
            Matcher m = p.matcher(ans);
            if (m.find()) {
                ans = m.replaceAll("$1$2$3");
            }
            ans = ans.trim();
            return ans;
        }
    }

    static class HKPostProcessor
    extends BaseChinesePostProcessor {
        public HKPostProcessor() {
            this.puncs = new Character[]{Character.valueOf('\u3001'), Character.valueOf('\u3002'), Character.valueOf('\u3003'), Character.valueOf('\u3008'), Character.valueOf('\u3009'), Character.valueOf('\u300a'), Character.valueOf('\u300b'), Character.valueOf('\u300c'), Character.valueOf('\u300d'), Character.valueOf('\u300e'), Character.valueOf('\u300f'), Character.valueOf('\u3010'), Character.valueOf('\u3011'), Character.valueOf('\u3014'), Character.valueOf('\u3015'), Character.valueOf('\u2103')};
        }

        @Override
        public String postProcessingAnswer(String ans) {
            ans = this.separatePuncs(ans);
            String numPat = "[0-9]+";
            ans = this.processColons(ans, numPat);
            String[] puncPatterns = new String[]{"\u2014[\\s\\p{Zs}]*\u2014[\\s\\p{Zs}]*\u2014", "\u2026[\\s\\p{Zs}]*\u2026"};
            String[] correctPunc = new String[]{"\u2014\u2014\u2014", "\u2026\u2026"};
            for (int i = 0; i < puncPatterns.length; ++i) {
                Pattern p = patternMap.computeIfAbsent("[\\s\\p{Zs}]*" + puncPatterns[i] + "[\\s\\p{Zs}]*", s -> Pattern.compile(s));
                Matcher m = p.matcher(ans);
                ans = m.replaceAll(" " + correctPunc[i] + " ");
            }
            return ans.trim();
        }
    }

    static class ASPostProcessor
    extends BaseChinesePostProcessor {
        ASPostProcessor() {
        }

        @Override
        public String postProcessingAnswer(String ans) {
            ans = this.separatePuncs(ans);
            String numPat = "[\uff10-\uff19\u4e00\u4e8c\u4e09\u56db\u4e94\u516d\u4e03\u516b\u4e5d\u5341\u767e\u5343]+";
            ans = this.processColons(ans, numPat);
            ans = this.processPercents(ans, numPat);
            ans = ASPostProcessor.processDots(ans, numPat);
            ans = ASPostProcessor.processCommas(ans);
            return ans;
        }
    }

    static class CTPPostProcessor
    extends BaseChinesePostProcessor {
        public CTPPostProcessor() {
            this.puncs = new Character[]{Character.valueOf('\u3001'), Character.valueOf('\u3002'), Character.valueOf('\u3003'), Character.valueOf('\u3008'), Character.valueOf('\u3009'), Character.valueOf('\u300a'), Character.valueOf('\u300b'), Character.valueOf('\u300c'), Character.valueOf('\u300d'), Character.valueOf('\u300e'), Character.valueOf('\u300f'), Character.valueOf('\u3010'), Character.valueOf('\u3011'), Character.valueOf('\u3014'), Character.valueOf('\u3015'), Character.valueOf('('), Character.valueOf(')'), Character.valueOf('\"'), Character.valueOf('<'), Character.valueOf('>')};
        }

        @Override
        public String postProcessingAnswer(String ans) {
            return this.postProcessingAnswer(ans, true);
        }

        public String postProcessingAnswer(String ans, Boolean suppressMidDotPostprocessing) {
            String numPat = "[0-9\uff10-\uff19]+";
            ans = this.separatePuncs(ans);
            if (!suppressMidDotPostprocessing.booleanValue()) {
                ans = CTPPostProcessor.gluePunc(Character.valueOf('\u30fb'), ans);
            }
            ans = this.processColons(ans, numPat);
            ans = this.processPercents(ans, numPat);
            ans = CTPPostProcessor.processDots(ans, numPat);
            ans = CTPPostProcessor.processCommas(ans);
            return ans.trim();
        }
    }

    static class PKPostProcessor
    extends BaseChinesePostProcessor {
        PKPostProcessor() {
        }

        @Override
        public String postProcessingAnswer(String ans) {
            return this.postProcessingAnswer(ans, true);
        }

        public String postProcessingAnswer(String ans, Boolean keepAllWhitespaces) {
            ans = this.separatePuncs(ans);
            if (!keepAllWhitespaces.booleanValue()) {
                String numPat = "[0-9\uff10-\uff19\uff0e\u00b7\u4e00\u5341\u767e]+";
                ans = this.processColons(ans, numPat);
                ans = this.processPercents(ans, numPat);
                ans = PKPostProcessor.processDots(ans, numPat);
                ans = PKPostProcessor.processCommas(ans);
                String[] puncPatterns = new String[]{"\u2014[\\s\\p{Zs}]*\u2014[\\s\\p{Zs}]*\u2014", "\u2026[\\s\\p{Zs}]*\u2026"};
                String[] correctPunc = new String[]{"\u2014\u2014\u2014", "\u2026\u2026"};
                for (int i = 0; i < puncPatterns.length; ++i) {
                    Pattern p = patternMap.computeIfAbsent("[\\s\\p{Zs}]*" + puncPatterns[i] + "[\\s\\p{Zs}]*", s -> Pattern.compile(s));
                    Matcher m = p.matcher(ans);
                    ans = m.replaceAll(" " + correctPunc[i] + " ");
                }
            }
            ans = ans.trim();
            return ans;
        }
    }
}

