/*
 * Decompiled with CFR 0.152.
 */
package edu.columbia.ob.gen.app;

import edu.columbia.ob.gen.env.PreGenEnv;
import edu.columbia.ob.gen.util.PosTagging;
import java.io.File;
import java.io.FileWriter;
import java.io.PrintWriter;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import ob.util.Counts;
import ob.util.Pair;
import ob.util.Utils;

public class RealizationModelsCreator {
    private static final NumberFormat NF = NumberFormat.getNumberInstance();

    public static void main(String[] args) {
        if (args.length != 1) {
            System.out.println("usage: RealizationModelsCreator <domain_name>");
            System.exit(0);
        }
        String domainName = args[0];
        String domainCorpus = new File(PreGenEnv.getSubjectDomainCorpusDir(domainName), "corpus").getAbsolutePath();
        Counts<Pair<String>> wordPairCount = new Counts<Pair<String>>();
        Counts<Pair<String>> posNgramPairCount = new Counts<Pair<String>>();
        TreeMap<Integer, Counts<Integer>> lengthPairCounts = new TreeMap<Integer, Counts<Integer>>();
        int c = 0;
        for (String paragraph : Utils.readLinesDynamically(domainCorpus)) {
            if (++c % 1000 == 0) {
                System.out.println("went through " + c + " paragraphs   ---   " + Utils.usedMemory());
            }
            if (paragraph.startsWith("####") || paragraph.startsWith("=")) continue;
            List<String> sentences = RealizationModelsCreator.splitToSentences(paragraph);
            String previousSentence = null;
            for (String sentence : sentences) {
                if (previousSentence != null) {
                    RealizationModelsCreator.updateWordPairCount(sentence, previousSentence, wordPairCount);
                    RealizationModelsCreator.updatePosNgramPairCount(sentence, previousSentence, posNgramPairCount);
                    RealizationModelsCreator.updateLengthPairCounts(sentence, previousSentence, lengthPairCounts);
                }
                previousSentence = sentence;
            }
        }
        String wordModelFile = new File(PreGenEnv.getSubjectDomainCorpusDir(domainName), "realization_model_word").getAbsolutePath();
        String wordNormalizerFile = new File(PreGenEnv.getSubjectDomainCorpusDir(domainName), "realization_model_word_normalizer").getAbsolutePath();
        String posNgramModelFile = new File(PreGenEnv.getSubjectDomainCorpusDir(domainName), "realization_model_pos_ngram").getAbsolutePath();
        String posNgramNormalizerFile = new File(PreGenEnv.getSubjectDomainCorpusDir(domainName), "realization_model_pos_ngram_normalizer").getAbsolutePath();
        String lengthModelFile = new File(PreGenEnv.getSubjectDomainCorpusDir(domainName), "realization_model_length").getAbsolutePath();
        RealizationModelsCreator.outputNgramModel(wordPairCount, wordModelFile);
        RealizationModelsCreator.outputNormalizers(wordPairCount, wordNormalizerFile);
        RealizationModelsCreator.outputNgramModel(posNgramPairCount, posNgramModelFile);
        RealizationModelsCreator.outputNormalizers(posNgramPairCount, posNgramNormalizerFile);
        RealizationModelsCreator.outputLengthModel(lengthPairCounts, lengthModelFile);
    }

    private static void outputNormalizers(Counts<Pair<String>> pairCount, String file) {
        int total = 0;
        Counts<String> counts = new Counts<String>();
        for (Pair<String> pair : pairCount) {
            String word = pair.getSecond();
            int i = 0;
            while (i < pairCount.getCount(pair)) {
                counts.add(word);
                ++total;
                ++i;
            }
        }
        List<String> sortedWords = RealizationModelsCreator.getSortedWords(counts);
        try {
            PrintWriter pw = new PrintWriter(new FileWriter(file));
            for (String word : sortedWords) {
                double score = (double)counts.getCount(word).intValue() / (double)total;
                pw.println(String.valueOf(word) + "\t" + score);
            }
            pw.close();
        }
        catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    private static void outputLengthModel(Map<Integer, Counts<Integer>> lengthPairCounts, String file) {
        try {
            PrintWriter pw = new PrintWriter(new FileWriter(file));
            for (Integer length : lengthPairCounts.keySet()) {
                Counts<Integer> counts = lengthPairCounts.get(length);
                int totalCounts = 0;
                for (Integer nextLength : counts) {
                    totalCounts += counts.getCount(nextLength).intValue();
                }
                double averageNextLength = 0.0;
                for (Integer nextLength : counts) {
                    averageNextLength += (double)(nextLength * counts.getCount(nextLength));
                }
                pw.println(length + "\t" + NF.format(averageNextLength /= (double)totalCounts));
            }
            pw.close();
        }
        catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    private static void outputNgramModel(Counts<Pair<String>> pairCount, String file) {
        try {
            PrintWriter pw = new PrintWriter(new FileWriter(file));
            int totalCount = 0;
            for (Pair<String> pair : pairCount) {
                totalCount += pairCount.getCount(pair).intValue();
            }
            List<Pair<String>> sortedPairs = RealizationModelsCreator.getSortedPairs(pairCount);
            for (Pair<String> pair : sortedPairs) {
                if (pairCount.getCount(pair) <= 1) continue;
                double score = (double)pairCount.getCount(pair).intValue() / (double)totalCount;
                pw.println(String.valueOf(pair.getFirst()) + "\t" + pair.getSecond() + "\t" + score);
            }
            pw.close();
        }
        catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    private static List<Pair<String>> getSortedPairs(final Counts<Pair<String>> counts) {
        ArrayList<Pair<String>> sortedNgrams = new ArrayList<Pair<String>>(counts);
        Collections.sort(sortedNgrams, new Comparator<Pair<String>>(){

            @Override
            public int compare(Pair<String> o1, Pair<String> o2) {
                return -(counts.getCount(o1) - counts.getCount(o2));
            }
        });
        return sortedNgrams;
    }

    private static List<String> getSortedWords(final Counts<String> counts) {
        ArrayList<String> sortedWords = new ArrayList<String>(counts);
        Collections.sort(sortedWords, new Comparator<String>(){

            @Override
            public int compare(String o1, String o2) {
                return -(counts.getCount(o1) - counts.getCount(o2));
            }
        });
        return sortedWords;
    }

    private static void updateLengthPairCounts(String sentence, String previousSentence, Map<Integer, Counts<Integer>> lengthPairCounts) {
        int previousLength = previousSentence.split("\\s+").length;
        int currentLength = sentence.split("\\s+").length;
        Counts<Integer> counts = lengthPairCounts.get(previousLength);
        if (counts == null) {
            counts = new Counts();
            lengthPairCounts.put(previousLength, counts);
        }
        counts.add(currentLength);
    }

    private static void updatePosNgramPairCount(String sentence, String previousSentence, Counts<Pair<String>> posNgramPairCount) {
        String[] sentence1words = previousSentence.split("[^A-Za-z0-9\\-]");
        String[] sentence2words = sentence.split("[^A-Za-z0-9\\-]");
        String[] sentence1pos = PosTagging.getPos(sentence1words);
        String[] sentence2pos = PosTagging.getPos(sentence2words);
        int i = 1;
        while (i < sentence1pos.length) {
            String pos1 = String.valueOf(sentence1pos[i - 1]) + "_" + sentence1pos[i];
            int j = 1;
            while (j < sentence2pos.length) {
                String pos2 = String.valueOf(sentence2pos[j - 1]) + "_" + sentence2pos[j];
                posNgramPairCount.add(new Pair<String>(pos1, pos2));
                ++j;
            }
            ++i;
        }
    }

    private static void updateWordPairCount(String sentence, String previousSentence, Counts<Pair<String>> wordPairCount) {
        String[] sentence1words = previousSentence.toLowerCase().split("[^A-Za-z0-9\\-]");
        String[] sentence2words = sentence.toLowerCase().split("[^A-Za-z0-9\\-]");
        String[] stringArray = sentence1words;
        int n = sentence1words.length;
        int n2 = 0;
        while (n2 < n) {
            String word1 = stringArray[n2];
            if (word1.length() >= 2 && word1.matches("[A-Za-z\\-]+")) {
                String[] stringArray2 = sentence2words;
                int n3 = sentence2words.length;
                int n4 = 0;
                while (n4 < n3) {
                    String word2 = stringArray2[n4];
                    if (word2.length() >= 2 && word2.matches("[A-Za-z\\-]+")) {
                        wordPairCount.add(new Pair<String>(word1, word2));
                    }
                    ++n4;
                }
            }
            ++n2;
        }
    }

    private static List<String> splitToSentences(String paragraph) {
        return Utils.list(paragraph.split("\\.\\s+"));
    }
}

