/*
 * Decompiled with CFR 0.152.
 */
package edu.columbia.ob.gen.paraphraseMining;

import edu.columbia.ob.gen.paraphraseMining.DbpediaWordNetTaxonomy;
import edu.columbia.ob.gen.paraphraseMining.ParaphraseUtils;
import edu.columbia.ob.gen.paraphraseMining.Taxonomy;
import edu.columbia.ob.gen.paraphraseMining.Vectors;
import java.io.FileWriter;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import ob.util.Counts;
import ob.util.Utils;

public class MatchSentences {
    private static List<String> _sentences;
    private static Taxonomy _taxonomy;
    private static boolean _modifyFixedEntities;
    private static Vectors _vectors;

    static {
        _modifyFixedEntities = false;
    }

    public static void main(String[] args) throws Exception {
        if (args.length != 4) {
            throw new RuntimeException("expected four arguments: entities (input) file, output file, domain article type (optional) and domain article list file (optional)");
        }
        String entitiesFile = args[0];
        String outfile = args[1];
        String prettyOutfile = String.valueOf(outfile) + ".pretty";
        String domainArticleType = args[2];
        String domainArticleListFile = args[3];
        _sentences = Utils.readLines(entitiesFile);
        _taxonomy = new DbpediaWordNetTaxonomy(MatchSentences.getCorpusEntities(_sentences), ParaphraseUtils.getDomainCorpusTaxonomy(domainArticleType, domainArticleListFile));
        System.out.println("loaded taxonomy");
        TreeMap<String, TreeSet<Iterator<Object>>> clusters = new TreeMap<String, TreeSet<Iterator<Object>>>();
        for (String sentence : _sentences) {
            Map<String, String> entityTypes = MatchSentences.chooseSingleTypes(sentence);
            if (entityTypes == null) continue;
            Iterator modifiedSentence = MatchSentences.modifySentenceWithTypes(sentence, entityTypes, _modifyFixedEntities);
            if (entityTypes.size() < 2) continue;
            ArrayList<String> types = new ArrayList<String>(entityTypes.values());
            Collections.sort(types);
            String id = ((Object)types).toString();
            TreeSet clusterSentences = (TreeSet)clusters.get(id);
            if (clusterSentences == null) {
                clusterSentences = new TreeSet();
                clusters.put(id, clusterSentences);
            }
            clusterSentences.add(modifiedSentence);
        }
        System.out.println("Done with grouping");
        if (_modifyFixedEntities) {
            TreeMap<String, Collection<String>> modClusters = new TreeMap<String, Collection<String>>();
            HashSet<String> toRemove = new HashSet<String>();
            for (String id : clusters.keySet()) {
                Collection<String> cluster = MatchSentences.modifyClusterWithFixedEntity((Collection)clusters.get(id));
                if (cluster == clusters.get(id)) continue;
                toRemove.add(id);
                Map<String, String> typesToEntities = MatchSentences.getTemplateTypesToEntities(cluster.iterator().next());
                ArrayList<String> types = new ArrayList<String>(typesToEntities.keySet());
                Collections.sort(types);
                id = ((Object)types).toString();
                modClusters.put(id, cluster);
            }
            for (String id : toRemove) {
                clusters.remove(id);
            }
            clusters.putAll(modClusters);
        }
        PrintWriter pw = new PrintWriter(new FileWriter(outfile));
        for (String id : clusters.keySet()) {
            if (((Collection)clusters.get(id)).size() <= 1) continue;
            pw.print(id);
            for (String sentence : (Collection)clusters.get(id)) {
                pw.print("\t" + sentence);
            }
            pw.println();
        }
        pw.close();
        pw = new PrintWriter(new FileWriter(prettyOutfile));
        for (String id : clusters.keySet()) {
            if (((Collection)clusters.get(id)).size() <= 1) continue;
            pw.println(id);
            for (String sentence : (Collection)clusters.get(id)) {
                pw.println("  " + sentence);
            }
            pw.println();
        }
        pw.close();
    }

    private static Collection<String> modifyClusterWithFixedEntity(Collection<String> cluster) {
        HashMap<String, String> commonEntities = null;
        for (String template : cluster) {
            Map<String, String> typesToEntities = MatchSentences.getTemplateTypesToEntities(template);
            if (commonEntities == null) {
                commonEntities = new HashMap<String, String>();
                for (String type : typesToEntities.keySet()) {
                    commonEntities.put(type, typesToEntities.get(type));
                }
            } else {
                for (String type : typesToEntities.keySet()) {
                    if (((String)commonEntities.get(type)).equals(typesToEntities.get(type))) continue;
                    commonEntities.remove(type);
                }
            }
            if (!commonEntities.isEmpty()) continue;
            return cluster;
        }
        TreeSet<String> modCluster = new TreeSet<String>();
        for (String template : cluster) {
            for (String type : commonEntities.keySet()) {
                template = template.replace("[[" + type + "]]", (CharSequence)commonEntities.get(type));
            }
            modCluster.add(template);
        }
        return modCluster;
    }

    private static Map<String, String> getTemplateTypesToEntities(String template) {
        HashMap<String, String> typesToEntities = new HashMap<String, String>();
        int index = template.indexOf("[[") + 2;
        while (index != -1) {
            String[] typeAndEntity = template.substring(index, template.indexOf("]]", index)).split("\\|");
            typesToEntities.put(typeAndEntity[0], typeAndEntity[1]);
            index = template.indexOf("[[", index) + 2;
        }
        return typesToEntities;
    }

    private static Map<String, String> chooseSingleTypes(String sentence) {
        LinkedHashMap<String, String> entityTypes = new LinkedHashMap<String, String>();
        List<Entity> entities = MatchSentences.getEntities(sentence);
        Counts<String> typeCounts = new Counts<String>();
        int i = 0;
        while (i < entities.size()) {
            Entity entity = entities.get(i);
            if (entity.getType() != null) {
                typeCounts.add(entity.getType());
                entityTypes.put(entity.getName(), "N/A (" + entity.getType() + ") " + typeCounts.getCount(entity.getType()));
            } else if (!entityTypes.containsKey(entity.getName())) {
                String context = MatchSentences.getEntityContext(i, sentence);
                Collection<String> typesForEntity = MatchSentences.getTypesForBestSense(entity.getName(), context);
                if (typesForEntity == null || typesForEntity.isEmpty()) {
                    System.out.println("no types for entity: " + entity.getName());
                    return null;
                }
                String type = MatchSentences.chooseBestType(typesForEntity, context);
                typeCounts.add(type);
                type = String.valueOf(type) + " (" + MatchSentences.makeTypeWord(type) + ") " + typeCounts.getCount(type);
                entityTypes.put(entity.getName(), type);
            }
            ++i;
        }
        return entityTypes;
    }

    private static String modifySentenceWithTypes(String sentence, Map<String, String> entityTypes, boolean modifyFixedEntities) {
        int startIndex = 0;
        List<Entity> entities = MatchSentences.getEntities(sentence);
        boolean t = false;
        HashMap<String, String> removedEntities = new HashMap<String, String>();
        int i = 0;
        while (i < entities.size()) {
            int endIndex;
            Entity entity = entities.get(i);
            String type = entityTypes.get(entity.getName());
            if (type == null && !removedEntities.containsKey(entity.getName())) {
                throw new RuntimeException("WTF?!?");
            }
            if (removedEntities.containsKey(entity.getName()) || MatchSentences.isSame(entity, type)) {
                startIndex = sentence.indexOf("[[", startIndex);
                endIndex = sentence.indexOf("]]", startIndex) + 2;
                String entityForSentence = removedEntities.containsKey(entity.getName()) ? (String)removedEntities.get(entity.getName()) : MatchSentences.ucFirsts(type.substring(type.indexOf("(") + 1, type.indexOf(")")));
                sentence = String.valueOf(sentence.substring(0, startIndex)) + entityForSentence + sentence.substring(endIndex, sentence.length());
                t = true;
                if (!removedEntities.containsKey(entity.getName())) {
                    entityTypes.remove(entity.getName());
                    removedEntities.put(entity.getName(), entityForSentence);
                }
            } else {
                startIndex = sentence.indexOf("[[", startIndex) + 2;
                endIndex = sentence.indexOf("]]", startIndex);
                sentence = String.valueOf(sentence.substring(0, startIndex)) + type + (modifyFixedEntities ? "|" + entity.getName() : "") + sentence.substring(endIndex, sentence.length());
            }
            ++i;
        }
        if (t) {
            System.out.println("ENTITY CHOSEN: " + sentence);
        }
        return sentence;
    }

    private static String ucFirsts(String string) {
        StringBuilder sb2 = new StringBuilder();
        string = string.replace("_", " ");
        String[] stringArray = string.split("\\s+");
        int n = stringArray.length;
        int n2 = 0;
        while (n2 < n) {
            String word = stringArray[n2];
            word = String.valueOf(word.substring(0, 1).toUpperCase()) + word.substring(1);
            sb2.append(" " + word);
            ++n2;
        }
        return sb2.toString().substring(1);
    }

    private static boolean isSame(Entity entity, String type) {
        Collection<String> senses = _taxonomy.getSenses(entity.getName());
        if (senses == null) {
            return false;
        }
        for (String sense : senses) {
            if (!sense.equals(type.split("\\s+")[0])) continue;
            return true;
        }
        return false;
    }

    private static Collection<String> getTypesForBestSense(String entity, String context) {
        Collection<String> senses = _taxonomy.getSenses(entity);
        if (MatchSentences.isNumber(entity) && (senses == null || entity.length() != 4)) {
            senses = _taxonomy.getSenses("integer");
        }
        if (senses == null && entity.endsWith("s")) {
            senses = _taxonomy.getSenses(entity.substring(0, entity.length() - 1));
        }
        double bestScore = Double.NEGATIVE_INFINITY;
        HashSet<String> bestTypes = null;
        if (senses != null) {
            for (String sense : senses) {
                double score;
                HashSet<String> types = new HashSet<String>();
                types.add(sense);
                if (_taxonomy.getTypes(sense) != null) {
                    for (String type : _taxonomy.getTypes(sense)) {
                        if (MatchSentences.ignore(type)) continue;
                        types.add(type);
                    }
                }
                HashSet<String> done = new HashSet<String>();
                while (types.size() != done.size()) {
                    HashSet<String> newTypes = new HashSet<String>(types);
                    for (String type : types) {
                        if (done.contains(type)) continue;
                        Collection<String> typeTypes = _taxonomy.getTypes(type);
                        if (typeTypes != null) {
                            for (String typeType : typeTypes) {
                                if (MatchSentences.ignore(typeType)) continue;
                                newTypes.add(typeType);
                            }
                        }
                        done.add(type);
                    }
                    types = newTypes;
                }
                if (types.isEmpty() || !((score = MatchSentences.getSenseScore(types, context)) > bestScore)) continue;
                bestScore = score;
                bestTypes = types;
            }
        }
        return bestTypes;
    }

    private static double getSenseScore(Collection<String> types, String context) {
        String typesSentence = MatchSentences.buildTypesSentence(types);
        return MatchSentences.getVectors().cosineSimilarity(typesSentence, context);
    }

    private static boolean isNumber(String entity) {
        if (entity.matches("\\-?\\d*([\\.\\,]?\\d+)+")) {
            return true;
        }
        if (entity.matches("\\-?\\d*([\\.\\,]?\\d+)+_thousand")) {
            return true;
        }
        if (entity.matches("\\-?\\d*([\\.\\,]?\\d+)+_million")) {
            return true;
        }
        if (entity.matches("\\-?\\d*([\\.\\,]?\\d+)+_billion")) {
            return true;
        }
        return entity.matches("\\-?\\d*([\\.\\,]?\\d+)+_trillion");
    }

    private static Vectors getVectors() {
        if (_vectors == null) {
            Set<String> corpusTerms = MatchSentences.getCorpusTerms(_sentences, _taxonomy);
            _vectors = Vectors.readFromFile("C:/Users/Or/Desktop/wp/vectors.culled.real10.tfidf", corpusTerms, 3.0);
            _vectors.setUseCache(false);
        }
        return _vectors;
    }

    private static Set<String> getCorpusTerms(List<String> sentences, Taxonomy taxonomy) {
        int n;
        HashSet<String> terms = new HashSet<String>();
        for (String sentence : sentences) {
            String[] tokens;
            String normalized = ParaphraseUtils.normalizeText(sentence);
            String[] stringArray = tokens = normalized.split("\\s+");
            int n2 = tokens.length;
            n = 0;
            while (n < n2) {
                String token = stringArray[n];
                terms.add(token);
                ++n;
            }
        }
        for (String word : taxonomy.getAllWords()) {
            String[] tokens;
            if (!word.matches("[a-z\\-\\_]+")) continue;
            String[] stringArray = tokens = word.split("_");
            n = tokens.length;
            int n3 = 0;
            while (n3 < n) {
                String token = stringArray[n3];
                terms.add(token);
                ++n3;
            }
        }
        return terms;
    }

    private static Set<String> getCorpusEntities(List<String> sentences) {
        HashSet<String> terms = new HashSet<String>();
        for (String sentence : sentences) {
            List<Entity> entities = MatchSentences.getEntities(sentence);
            for (Entity entity : entities) {
                terms.add(entity.getName());
            }
        }
        return terms;
    }

    private static String buildTypesSentence(Collection<String> types) {
        StringBuilder typesSentence = new StringBuilder();
        for (String type : types) {
            typesSentence.append(String.valueOf(MatchSentences.makeTypeSentence(type)) + " ");
        }
        return typesSentence.toString().trim();
    }

    private static boolean ignore(String type) {
        if (type.equals("00001740")) {
            return true;
        }
        if (type.equals("00002119")) {
            return true;
        }
        if (type.equals("00002236")) {
            return true;
        }
        if (type.equals("00001930")) {
            return true;
        }
        if (type.equals("00003122")) {
            return true;
        }
        if (type.equals("00007127")) {
            return true;
        }
        if (type.equals("00004359")) {
            return true;
        }
        if (type.equals("00004576")) {
            return true;
        }
        if (type.equals("00031433")) {
            return true;
        }
        if (type.equals("00029714")) {
            return true;
        }
        if (type.equals("13401506")) {
            return true;
        }
        if (type.equals("00002533")) {
            return true;
        }
        if (type.equals("00002819")) {
            return true;
        }
        if (type.equals("00002929")) {
            return true;
        }
        if (type.equals("00003037")) {
            return true;
        }
        if (type.equals("00003991")) {
            return true;
        }
        if (type.equals("00018635")) {
            return true;
        }
        if (type.equals("00019102")) {
            return true;
        }
        if (type.equals("00020846")) {
            return true;
        }
        if (type.equals("00021810")) {
            return true;
        }
        if (type.equals("00022007")) {
            return true;
        }
        if (type.equals("00022178")) {
            return true;
        }
        if (type.equals("00032028")) {
            return true;
        }
        if (type.equals("06264091")) {
            return true;
        }
        if (type.equals("13401274")) {
            return true;
        }
        if (type.equals("00023153")) {
            return true;
        }
        if (type.equals("04668449")) {
            return true;
        }
        if (type.equals("00023591")) {
            return true;
        }
        if (type.equals("05737450")) {
            return true;
        }
        if (type.equals("07011460")) {
            return true;
        }
        if (type.equals("13741363")) {
            return true;
        }
        if (type.equals("03536069")) {
            return true;
        }
        if (type.equals("00026552")) {
            return true;
        }
        if (type.equals("09251280")) {
            return true;
        }
        if (type.equals("14387488")) {
            return true;
        }
        if (type.equals("07843441")) {
            return true;
        }
        if (type.equals("05077585")) {
            return true;
        }
        if (type.equals("07842951")) {
            return true;
        }
        if (type.equals("05631017")) {
            return true;
        }
        return type.equals("07889030");
    }

    private static String chooseBestType(Collection<String> typesForEntity, String context) {
        if (typesForEntity.size() == 1) {
            return typesForEntity.iterator().next();
        }
        double bestScore = Double.NEGATIVE_INFINITY;
        String bestType = null;
        for (String type : typesForEntity) {
            double score = MatchSentences.getVectors().cosineSimilarity(context, MatchSentences.makeTypeSentence(type));
            if (!(score > bestScore)) continue;
            bestScore = score;
            bestType = type;
        }
        return bestType;
    }

    private static String makeTypeWord(String type) {
        return _taxonomy.getWords(type).iterator().next().split(":::")[0].toLowerCase();
    }

    private static String makeTypeSentence(String type) {
        StringBuilder sb2 = new StringBuilder();
        for (String word : _taxonomy.getWords(type)) {
            sb2.append(String.valueOf(word.split(":::")[0].toLowerCase()) + " ");
        }
        return sb2.toString().trim();
    }

    private static String getEntityContext(int entityIndex, String sentence) {
        String token;
        int startIndex = sentence.indexOf("[[");
        int i = 0;
        while (i < entityIndex) {
            startIndex = sentence.indexOf("[[", startIndex + 2);
            ++i;
        }
        int endIndex = sentence.indexOf("]]", startIndex);
        String preContext = ParaphraseUtils.normalizeText(sentence.substring(0, startIndex));
        String postContext = ParaphraseUtils.normalizeText(sentence.substring(endIndex, sentence.length()));
        String[] preContextTokens = preContext.split("\\s+");
        String[] postContextTokens = postContext.split("\\s+");
        preContextTokens = Arrays.copyOfRange(preContextTokens, Math.max(0, preContextTokens.length - 5), preContextTokens.length);
        postContextTokens = Arrays.copyOfRange(postContextTokens, 0, Math.min(postContextTokens.length, 5));
        StringBuilder sb2 = new StringBuilder();
        String[] stringArray = preContextTokens;
        int n = preContextTokens.length;
        int n2 = 0;
        while (n2 < n) {
            token = stringArray[n2];
            sb2.append(String.valueOf(token) + " ");
            ++n2;
        }
        stringArray = postContextTokens;
        n = postContextTokens.length;
        n2 = 0;
        while (n2 < n) {
            token = stringArray[n2];
            sb2.append(String.valueOf(token) + " ");
            ++n2;
        }
        return sb2.toString().trim();
    }

    private static List<Entity> getEntities(String sentence) {
        ArrayList<Entity> entities = new ArrayList<Entity>();
        while (sentence.contains("[[")) {
            String text = sentence.substring(sentence.indexOf("[[") + 2, sentence.indexOf("]]"));
            String[] tokens = text.split("\\|");
            if (tokens.length > 2) {
                throw new RuntimeException("WTF?!?");
            }
            String name = tokens.length == 2 ? tokens[1] : tokens[0];
            name = MatchSentences.normalize(name);
            String type = tokens.length == 2 ? tokens[0] : null;
            entities.add(new Entity(name, type));
            sentence = sentence.substring(sentence.indexOf("]]") + 2);
        }
        return entities;
    }

    private static String normalize(String string) {
        return string.replaceAll("\\s+", "_").toLowerCase();
    }

    private static class Entity {
        String _name;
        String _type;

        public Entity(String name, String type) {
            this._name = name;
            this._type = type;
        }

        public String getName() {
            return this._name;
        }

        public String getType() {
            return this._type;
        }
    }
}

