/*
 * Decompiled with CFR 0.152.
 */
package edu.columbia.ob.gen.gems.rdfApps;

import edu.columbia.ob.gen.env.PreGenEnv;
import edu.columbia.ob.gen.gems.rdfApps.RdfUtils;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import ob.util.collections.MultiMap3;

public class BuildDataForCrowdFlowerEval1 {
    private static final int INSTANCES_PER_DOMAIN = 100;
    private static String _app = "cdesc";

    public static void main(String[] args) throws Exception {
        ArrayList<String> rows = new ArrayList<String>();
        BuildDataForCrowdFlowerEval1.addToRows(rows, _app, "video games", "video_game_industry-expanded", true);
        Collections.shuffle(rows);
        String header = "system,application,domain,instance,text";
        rows.add(0, header);
        String outfile = "working/cf_eval1_" + _app + "2.csv";
        for (String row : rows) {
            System.out.println(row);
        }
        PrintWriter pw = new PrintWriter(outfile, "UTF-8");
        for (String row : rows) {
            pw.println(row);
        }
        pw.close();
    }

    private static void addToRows(List<String> rows, String app, String domain, String domainDir, boolean includeNtl) {
        Collection<String> instances = BuildDataForCrowdFlowerEval1.chooseBestInstances(domainDir);
        for (String instance : instances) {
            rows.add(BuildDataForCrowdFlowerEval1.buildRow(app, domain, domainDir, instance, ""));
            rows.add(BuildDataForCrowdFlowerEval1.buildRow(app, domain, domainDir, instance, "-ndm"));
            rows.add(BuildDataForCrowdFlowerEval1.buildRow(app, domain, domainDir, instance, "-npr"));
            if (includeNtl) {
                rows.add(BuildDataForCrowdFlowerEval1.buildRow(app, domain, domainDir, instance, "-ntl"));
            }
            rows.add(BuildDataForCrowdFlowerEval1.buildRow(app, domain, domainDir, instance, "-ndm-ntl-npr-nhg"));
        }
    }

    private static String buildRow(String app, String domain, String domainDir, String instance, String system) {
        String file = "C:/dev/working/PreGen/" + domainDir + system + "/" + instance.toLowerCase().replace(" ", "_").replaceAll("[^a-z0-9\\_]", "");
        String text = BuildDataForCrowdFlowerEval1.prepareTextForCsv(BuildDataForCrowdFlowerEval1.readLines(file));
        return String.valueOf(BuildDataForCrowdFlowerEval1.makeSystemName(system)) + "," + app + "," + domain + ",\"" + BuildDataForCrowdFlowerEval1.makeInstanceName(instance) + "\",\"" + text + "\"";
    }

    private static List<String> readLines(String file) {
        ArrayList<String> lines = new ArrayList<String>();
        try {
            BufferedReader reader = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(file), "UTF-8"));
            while (reader.ready()) {
                lines.add(reader.readLine());
            }
            reader.close();
        }
        catch (Exception e) {
            throw new RuntimeException(e);
        }
        return lines;
    }

    private static String makeInstanceName(String text) {
        if (text.contains(" (")) {
            text = text.substring(0, text.indexOf(" ("));
        }
        try {
            text = URLDecoder.decode(text, "UTF-8");
        }
        catch (Exception e) {
            throw new RuntimeException(e);
        }
        return text;
    }

    private static String prepareTextForCsv(List<String> lines) {
        String text = "<p>";
        boolean newParagraph = true;
        boolean first = true;
        for (String line : lines) {
            if (line.contains("active years")) continue;
            if (line.trim().isEmpty()) {
                newParagraph = true;
            } else {
                if (newParagraph) {
                    if (!first) {
                        text = String.valueOf(text) + "</p><p>";
                    }
                } else {
                    text = String.valueOf(text) + "<br>";
                }
                text = String.valueOf(text) + BuildDataForCrowdFlowerEval1.fixForCsv(line);
                newParagraph = false;
            }
            first = false;
        }
        text = String.valueOf(text) + "</p>";
        return text;
    }

    private static String fixForCsv(String line) {
        line = line.replaceAll("\"", "''");
        return line;
    }

    private static String makeSystemName(String system) {
        if (system.isEmpty()) {
            return "full";
        }
        if (system.equals("-ndm-ntl-npr-nhg")) {
            return "baseline";
        }
        return system.substring(1);
    }

    private static Collection<String> chooseBestInstances(String domainName) {
        String rdfTriplesFile = new File(PreGenEnv.getSubjectDomainCorpusDir(domainName), "rdf_triples").getAbsolutePath();
        final MultiMap3<String, String, String> triples = RdfUtils.readRdfTriples(rdfTriplesFile);
        ArrayList<String> orderedEntities = new ArrayList<String>(triples.keySet());
        Collections.sort(orderedEntities, new Comparator<String>(){

            @Override
            public int compare(String o1, String o2) {
                return -(triples.get(o1).size() - triples.get(o2).size());
            }
        });
        return orderedEntities.subList(0, 100);
    }
}

