/*
 * Decompiled with CFR 0.152.
 */
package edu.columbia.ob.gen.paraphraseMining;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import ob.util.Utils;

public class ParaphraseUtils {
    private static Set<String> _stopWords;
    private static Set<String> _stopWordsBigList;
    private static final boolean ARTICLE_LAST_WORD_ALIAS = false;

    public static String normalizeText(String text) {
        text = text.trim().toLowerCase();
        text = text.replaceAll("&[a-z]+;", " ");
        text = text.replaceAll("[^A-Za-z]+&[^\\s]+", " ");
        text = text.replace("'s ", " QPQ ");
        text = text.replace("% ", " QRQ ");
        text = text.replace(" $", " QDQ ");
        text = text.replace(" & ", " and ");
        text = text.replace("&", "QAQ");
        text = text.replace(" + ", " QUQ ");
        text = text.replace(" = ", " QEQ ");
        text = text.replace(" @", " QTQ ");
        text = text.replace(" #", " QHQ ");
        text = text.replace(" ~", " QIQ ");
        text = text.replace(" / ", " QSQ ");
        text = text.replaceAll("[^A-Za-z0-9\\-]", " ");
        text = text.replaceAll("\\d+", " QNQ ");
        text = text.replaceAll("\\-+\\s", " ");
        text = text.replaceAll("\\s\\-+", " ");
        while (text.startsWith("-")) {
            text = text.substring(1);
        }
        while (text.endsWith("-")) {
            text = text.substring(0, text.length() - 1);
        }
        text = text.replaceAll("\\s+", " ");
        text = text.trim();
        StringBuilder sb2 = new StringBuilder();
        String[] stringArray = text.split("\\s");
        int n = stringArray.length;
        int n2 = 0;
        while (n2 < n) {
            String token = stringArray[n2];
            if (!ParaphraseUtils.isStopWord(token)) {
                sb2.append(" " + token);
            }
            ++n2;
        }
        if (sb2.length() == 0) {
            return "";
        }
        return sb2.toString().substring(1);
    }

    public static boolean isStopWord(String string) {
        if (_stopWords == null) {
            _stopWords = new HashSet<String>();
            _stopWords.addAll(Utils.readLines("config/stopwords-medium"));
            _stopWords.add("QPQ");
            _stopWords.add("QRQ");
            _stopWords.add("QDQ");
            _stopWords.add("QAQ");
            _stopWords.add("QUQ");
            _stopWords.add("QEQ");
            _stopWords.add("QTQ");
            _stopWords.add("QHQ");
            _stopWords.add("QIQ");
            _stopWords.add("QSQ");
            _stopWords.add("QNQ");
        }
        return _stopWords.contains(string);
    }

    public static boolean isStopWordBigList(String string) {
        if (_stopWordsBigList == null) {
            _stopWordsBigList = new HashSet<String>();
            _stopWordsBigList.addAll(Utils.readLines("config/stopwords"));
            _stopWordsBigList.add("QPQ");
            _stopWordsBigList.add("QRQ");
            _stopWordsBigList.add("QDQ");
            _stopWordsBigList.add("QAQ");
            _stopWordsBigList.add("QUQ");
            _stopWordsBigList.add("QEQ");
            _stopWordsBigList.add("QTQ");
            _stopWordsBigList.add("QHQ");
            _stopWordsBigList.add("QIQ");
            _stopWordsBigList.add("QSQ");
            _stopWordsBigList.add("QNQ");
        }
        return _stopWordsBigList.contains(string);
    }

    public static Map<String, String> getDomainCorpusTaxonomy(String domainArticleType, String domainArticleListFile) {
        if (domainArticleType == null) {
            return null;
        }
        HashMap<String, String> dct = new HashMap<String, String>();
        for (String line : Utils.readLinesDynamically(domainArticleListFile)) {
            if (line.contains("(")) {
                line = line.substring(0, line.indexOf("(")).trim();
            }
            dct.put(line, domainArticleType);
        }
        return dct;
    }
}

