/*
 * Decompiled with CFR 0.152.
 */
package edu.columbia.ob.gen.paraphraseMining;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import ob.util.StopWatch;
import ob.util.Utils;

public class Vectors {
    private static final Double DEFAULT_MINIMUM_SCORE = 1.0;
    private static final Double DEFAULT_MISSING_VALUE = 1.0;
    private boolean _useCache = true;
    private double _missingValue = DEFAULT_MISSING_VALUE;
    private Map<String, Map<String, Double>> _map = new HashMap<String, Map<String, Double>>();
    private Map<String, Map<String, Double>> _combinedCache = new HashMap<String, Map<String, Double>>();

    public static Vectors readFromFile(String vectorFile) {
        return Vectors.readFromFile(vectorFile, null);
    }

    public static Vectors readFromFile(String vectorFile, Set<String> termsInCorpus) {
        return Vectors.readFromFile(vectorFile, termsInCorpus, DEFAULT_MINIMUM_SCORE);
    }

    public static Vectors readFromFile(String vectorFile, Set<String> termsInCorpus, Double minimumScore) {
        if (termsInCorpus != null) {
            System.out.println("terms in corpus: " + termsInCorpus.size());
        }
        Vectors vectors = new Vectors();
        int c = 0;
        int d = 0;
        StopWatch sw = new StopWatch();
        for (String line : Utils.readLinesDynamically(vectorFile)) {
            if (c % 10000 == 0) {
                System.out.println("read " + d + " out of " + c + " vectors  --  " + Utils.usedMemory() + "  --  " + sw.getTimeElapsedPretty());
            }
            ++c;
            String[] tokens = line.split("\\t");
            String term = tokens[0];
            if (termsInCorpus != null && !termsInCorpus.contains(term)) continue;
            ++d;
            HashMap<String, Double> vector = new HashMap<String, Double>();
            int i = 1;
            while (i < tokens.length) {
                String[] featureAndScore = tokens[i].split(":");
                Double score = Double.parseDouble(featureAndScore[1]);
                if (score > minimumScore) {
                    vector.put(featureAndScore[0], score);
                }
                ++i;
            }
            if (vector.isEmpty()) continue;
            vectors._map.put(term, vector);
        }
        return vectors;
    }

    public double cosineSimilarity(String sentence1, String sentence2) {
        Map<String, Double> vector1 = this.getCombinedVector(sentence1);
        Map<String, Double> vector2 = this.getCombinedVector(sentence2);
        if (vector1 == null) {
            System.out.println("null vector for sentence: " + sentence1);
            return 0.0;
        }
        if (vector2 == null) {
            System.out.println("null vector for sentence: " + sentence2);
            return 0.0;
        }
        return Vectors.cosineSimilarity(vector1, vector2);
    }

    public static double cosineSimilarity(Map<String, Double> vector1, Map<String, Double> vector2) {
        double numerator = Vectors.dotProduct(vector1, vector2);
        double denominator = Vectors.euclideanNorm(vector1) * Vectors.euclideanNorm(vector2);
        return numerator / denominator;
    }

    private static double euclideanNorm(Map<String, Double> vector) {
        double norm = 0.0;
        for (Double value : vector.values()) {
            norm += Math.pow(value, 2.0);
        }
        norm = Math.sqrt(norm);
        return norm;
    }

    private static double dotProduct(Map<String, Double> vector1, Map<String, Double> vector2) {
        double dotProduct = 0.0;
        for (String feature : vector1.keySet()) {
            Double value2 = vector2.get(feature);
            if (value2 == null) continue;
            dotProduct += value2 * vector1.get(feature);
        }
        return dotProduct;
    }

    public Map<String, Double> getCombinedVector(String sentence) {
        if (!this._useCache) {
            return this.makeCombinedVector(sentence);
        }
        if (!this._combinedCache.containsKey(sentence)) {
            Map<String, Double> vector = this.makeCombinedVector(sentence);
            this._combinedCache.put(sentence, vector);
        }
        return this._combinedCache.get(sentence);
    }

    private Map<String, Double> makeCombinedVector(String sentence) {
        HashMap<String, Double> vector = null;
        Collection<Map<String, Double>> tokenVectors = this.getTokenVectors(sentence);
        Set<String> features = this.getAllFeatures(tokenVectors);
        for (Map<String, Double> tokenVector : tokenVectors) {
            double value;
            if (vector == null) {
                vector = new HashMap<String, Double>();
                for (String feature : features) {
                    value = this.getValue(tokenVector, feature);
                    vector.put(feature, value);
                }
                continue;
            }
            for (String feature : features) {
                value = this.getValue(tokenVector, feature);
                vector.put(feature, (Double)vector.get(feature) * value);
            }
        }
        if (vector == null) {
            return null;
        }
        double normalizer = 1.0 / (double)sentence.split("[\\s_]").length;
        for (String feature : vector.keySet()) {
            vector.put(feature, Math.pow((Double)vector.get(feature), normalizer));
        }
        return vector;
    }

    private Double getValue(Map<String, Double> vector, String feature) {
        return vector.containsKey(feature) ? vector.get(feature) + this._missingValue : this._missingValue;
    }

    private Set<String> getAllFeatures(Collection<Map<String, Double>> vectors) {
        HashSet<String> features = new HashSet<String>();
        for (Map<String, Double> vector : vectors) {
            features.addAll(vector.keySet());
        }
        return features;
    }

    private Collection<Map<String, Double>> getTokenVectors(String sentence) {
        ArrayList<Map<String, Double>> tokenVectors = new ArrayList<Map<String, Double>>();
        String[] stringArray = sentence.split("\\s+");
        int n = stringArray.length;
        int n2 = 0;
        while (n2 < n) {
            String token = stringArray[n2];
            Map<String, Double> tokenVector = this.getVector(token);
            if (tokenVector != null) {
                tokenVectors.add(tokenVector);
            }
            ++n2;
        }
        return tokenVectors;
    }

    public Map<String, Double> getVector(String term) {
        Map<String, Double> vector = this._map.get(term);
        if (vector == null && term.contains("_")) {
            vector = this.getCombinedVector(term.replace("_", " "));
        }
        if (vector == null && term.contains("-")) {
            vector = this.getCombinedVector(term.replace("-", " "));
        }
        return vector;
    }

    public Double euclideanDistance(String sentence1, String sentence2) {
        Map<String, Double> vector1 = this.getCombinedVector(sentence1);
        Map<String, Double> vector2 = this.getCombinedVector(sentence2);
        return Vectors.euclideanDistance(vector1, vector2);
    }

    public static Double euclideanDistance(Map<String, Double> vector1, Map<String, Double> vector2) {
        double dist = 0.0;
        for (String feature : vector1.keySet()) {
            Double vector2Value = vector2.get(feature);
            if (vector2Value == null) {
                vector2Value = 0.0;
            }
            if (vector2Value == 0.0 || vector1.get(feature) == 0.0) continue;
            double featureDistance = vector1.get(feature) - vector2Value;
            dist += Math.pow(featureDistance, 2.0);
        }
        dist = Math.sqrt(dist);
        return dist;
    }

    public double euclideanDistance(Map<String, Double> vector, String sentence) {
        Map<String, Double> sentenceVector = this.getCombinedVector(sentence);
        return Vectors.euclideanDistance(sentenceVector, vector);
    }

    public void setUseCache(boolean useCache) {
        this._useCache = useCache;
    }

    public void setMissingValue(double missingValue) {
        this._missingValue = missingValue;
    }

    public void clearCache() {
        this._combinedCache = new HashMap<String, Map<String, Double>>();
    }
}

