/*
 * Decompiled with CFR 0.152.
 */
package com.giaybac.traprange;

import com.giaybac.traprange.TrapRangeBuilder;
import com.giaybac.traprange.entity.Table;
import com.giaybac.traprange.entity.TableCell;
import com.giaybac.traprange.entity.TableRow;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.LinkedListMultimap;
import com.google.common.collect.Multimap;
import com.google.common.collect.Range;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.TextPosition;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class PDFTableExtractor {
    private final Logger logger = LoggerFactory.getLogger(PDFTableExtractor.class);
    private final List<Integer> extractedPages = new ArrayList<Integer>();
    private final List<Integer> exceptedPages = new ArrayList<Integer>();
    private final Multimap<Integer, Integer> pageNExceptedLinesMap = HashMultimap.create();
    private InputStream inputStream;
    private PDDocument document;

    public PDFTableExtractor setSource(InputStream inputStream) {
        this.inputStream = inputStream;
        return this;
    }

    public PDFTableExtractor setSource(File file) {
        try {
            return this.setSource(new FileInputStream(file));
        }
        catch (FileNotFoundException ex) {
            throw new RuntimeException("Invalid pdf file", ex);
        }
    }

    public PDFTableExtractor setSource(String filePath) {
        return this.setSource(new File(filePath));
    }

    public PDFTableExtractor addPage(int pageIdx) {
        this.extractedPages.add(pageIdx);
        return this;
    }

    public PDFTableExtractor exceptPage(int pageIdx) {
        this.exceptedPages.add(pageIdx);
        return this;
    }

    public PDFTableExtractor exceptLine(int pageIdx, int[] lineIdxs) {
        for (int lineIdx : lineIdxs) {
            this.pageNExceptedLinesMap.put(pageIdx, lineIdx);
        }
        return this;
    }

    public PDFTableExtractor exceptLine(int[] lineIdxs) {
        this.exceptLine(-1, lineIdxs);
        return this;
    }

    public List<Table> extract() {
        ArrayList<Table> retVal = new ArrayList<Table>();
        LinkedListMultimap<Integer, Range<Integer>> pageIdNLineRangesMap = LinkedListMultimap.create();
        LinkedListMultimap<Integer, TextPosition> pageIdNTextsMap = LinkedListMultimap.create();
        try {
            this.document = PDDocument.load(this.inputStream);
            for (int pageId = 0; pageId < this.document.getNumberOfPages(); ++pageId) {
                boolean b;
                boolean bl = b = !this.exceptedPages.contains(pageId) && (this.extractedPages.isEmpty() || this.extractedPages.contains(pageId));
                if (!b) continue;
                List<TextPosition> texts = this.extractTextPositions(pageId);
                List<Range<Integer>> lineRanges = this.getLineRanges(pageId, texts);
                List<TextPosition> textsByLineRanges = this.getTextsByLineRanges(lineRanges, texts);
                pageIdNLineRangesMap.putAll(pageId, lineRanges);
                pageIdNTextsMap.putAll(pageId, textsByLineRanges);
            }
            List<Range<Integer>> columnRanges = this.getColumnRanges(pageIdNTextsMap.values());
            Iterator iterator = pageIdNTextsMap.keySet().iterator();
            while (iterator.hasNext()) {
                int pageId = (Integer)iterator.next();
                Table table = this.buildTable(pageId, (List)pageIdNTextsMap.get(pageId), (List)pageIdNLineRangesMap.get(pageId), columnRanges);
                retVal.add(table);
                this.logger.debug("Found " + table.getRows().size() + " row(s) and " + columnRanges.size() + " column(s) of a table in page " + pageId);
            }
        }
        catch (IOException ex) {
            throw new RuntimeException("Parse pdf file fail", ex);
        }
        finally {
            if (this.document != null) {
                try {
                    this.document.close();
                }
                catch (IOException ex) {
                    this.logger.error(null, ex);
                }
            }
        }
        return retVal;
    }

    private Table buildTable(int pageIdx, List<TextPosition> tableContent, List<Range<Integer>> rowTrapRanges, List<Range<Integer>> columnTrapRanges) {
        Table retVal = new Table(pageIdx, columnTrapRanges.size());
        int idx = 0;
        int rowIdx = 0;
        ArrayList<TextPosition> rowContent = new ArrayList<TextPosition>();
        while (idx < tableContent.size()) {
            Range<Integer> textRange;
            TextPosition textPosition = tableContent.get(idx);
            Range<Integer> rowTrapRange = rowTrapRanges.get(rowIdx);
            if (rowTrapRange.encloses(textRange = Range.closed((int)textPosition.getY(), (int)(textPosition.getY() + textPosition.getHeight())))) {
                rowContent.add(textPosition);
                ++idx;
                continue;
            }
            TableRow row = this.buildRow(rowIdx, rowContent, columnTrapRanges);
            retVal.getRows().add(row);
            rowContent.clear();
            ++rowIdx;
        }
        if (!rowContent.isEmpty() && rowIdx < rowTrapRanges.size()) {
            TableRow row = this.buildRow(rowIdx, rowContent, columnTrapRanges);
            retVal.getRows().add(row);
        }
        return retVal;
    }

    private TableRow buildRow(int rowIdx, List<TextPosition> rowContent, List<Range<Integer>> columnTrapRanges) {
        TableRow retVal = new TableRow(rowIdx);
        Collections.sort(rowContent, new Comparator<TextPosition>(){

            @Override
            public int compare(TextPosition o1, TextPosition o2) {
                int retVal = 0;
                if (o1.getX() < o2.getX()) {
                    retVal = -1;
                } else if (o1.getX() > o2.getX()) {
                    retVal = 1;
                }
                return retVal;
            }
        });
        int idx = 0;
        int columnIdx = 0;
        ArrayList<TextPosition> cellContent = new ArrayList<TextPosition>();
        while (idx < rowContent.size()) {
            Range<Integer> textRange;
            TextPosition textPosition = rowContent.get(idx);
            Range<Integer> columnTrapRange = columnTrapRanges.get(columnIdx);
            if (columnTrapRange.encloses(textRange = Range.closed((int)textPosition.getX(), (int)(textPosition.getX() + textPosition.getWidth())))) {
                cellContent.add(textPosition);
                ++idx;
                continue;
            }
            TableCell cell = this.buildCell(columnIdx, cellContent);
            retVal.getCells().add(cell);
            cellContent.clear();
            ++columnIdx;
        }
        if (!cellContent.isEmpty() && columnIdx < columnTrapRanges.size()) {
            TableCell cell = this.buildCell(columnIdx, cellContent);
            retVal.getCells().add(cell);
        }
        return retVal;
    }

    private TableCell buildCell(int columnIdx, List<TextPosition> cellContent) {
        Collections.sort(cellContent, new Comparator<TextPosition>(){

            @Override
            public int compare(TextPosition o1, TextPosition o2) {
                int retVal = 0;
                if (o1.getX() < o2.getX()) {
                    retVal = -1;
                } else if (o1.getX() > o2.getX()) {
                    retVal = 1;
                }
                return retVal;
            }
        });
        StringBuilder cellContentBuilder = new StringBuilder();
        for (TextPosition textPosition : cellContent) {
            cellContentBuilder.append(textPosition.getUnicode());
        }
        String cellContentString = cellContentBuilder.toString();
        return new TableCell(columnIdx, cellContentString);
    }

    private List<TextPosition> extractTextPositions(int pageId) throws IOException {
        TextPositionExtractor extractor = new TextPositionExtractor(this.document, pageId);
        return extractor.extract();
    }

    private boolean isExceptedLine(int pageIdx, int lineIdx) {
        boolean retVal = this.pageNExceptedLinesMap.containsEntry(pageIdx, lineIdx) || this.pageNExceptedLinesMap.containsEntry(-1, lineIdx);
        return retVal;
    }

    private List<TextPosition> getTextsByLineRanges(List<Range<Integer>> lineRanges, List<TextPosition> textPositions) {
        ArrayList<TextPosition> retVal = new ArrayList<TextPosition>();
        int idx = 0;
        int lineIdx = 0;
        while (idx < textPositions.size() && lineIdx < lineRanges.size()) {
            TextPosition textPosition = textPositions.get(idx);
            Range<Integer> textRange = Range.closed((int)textPosition.getY(), (int)(textPosition.getY() + textPosition.getHeight()));
            Range<Integer> lineRange = lineRanges.get(lineIdx);
            if (lineRange.encloses(textRange)) {
                retVal.add(textPosition);
                ++idx;
                continue;
            }
            if (lineRange.upperEndpoint() < textRange.lowerEndpoint()) {
                ++lineIdx;
                continue;
            }
            ++idx;
        }
        return retVal;
    }

    private List<Range<Integer>> getColumnRanges(Collection<TextPosition> texts) {
        TrapRangeBuilder rangesBuilder = new TrapRangeBuilder();
        for (TextPosition text : texts) {
            Range<Integer> range = Range.closed((int)text.getX(), (int)(text.getX() + text.getWidth()));
            rangesBuilder.addRange(range);
        }
        return rangesBuilder.build();
    }

    private List<Range<Integer>> getLineRanges(int pageId, List<TextPosition> pageContent) {
        TrapRangeBuilder lineTrapRangeBuilder = new TrapRangeBuilder();
        for (TextPosition textPosition : pageContent) {
            Range<Integer> lineRange = Range.closed((int)textPosition.getY(), (int)(textPosition.getY() + textPosition.getHeight()));
            lineTrapRangeBuilder.addRange(lineRange);
        }
        List<Range<Integer>> lineTrapRanges = lineTrapRangeBuilder.build();
        List<Range<Integer>> retVal = this.removeExceptedLines(pageId, lineTrapRanges);
        return retVal;
    }

    private List<Range<Integer>> removeExceptedLines(int pageIdx, List<Range<Integer>> lineTrapRanges) {
        ArrayList<Range<Integer>> retVal = new ArrayList<Range<Integer>>();
        for (int lineIdx = 0; lineIdx < lineTrapRanges.size(); ++lineIdx) {
            boolean isExceptedLine;
            boolean bl = isExceptedLine = this.isExceptedLine(pageIdx, lineIdx) || this.isExceptedLine(pageIdx, lineIdx - lineTrapRanges.size());
            if (isExceptedLine) continue;
            retVal.add(lineTrapRanges.get(lineIdx));
        }
        return retVal;
    }

    private static class TextPositionExtractor
    extends PDFTextStripper {
        private final List<TextPosition> textPositions = new ArrayList<TextPosition>();
        private final int pageId;

        private TextPositionExtractor(PDDocument document, int pageId) throws IOException {
            super.setSortByPosition(true);
            this.document = document;
            this.pageId = pageId;
        }

        public void stripPage(int pageId) throws IOException {
            this.setStartPage(pageId + 1);
            this.setEndPage(pageId + 1);
            try (OutputStreamWriter writer = new OutputStreamWriter(new ByteArrayOutputStream());){
                this.writeText(this.document, writer);
            }
        }

        @Override
        protected void writeString(String string, List<TextPosition> textPositions) throws IOException {
            this.textPositions.addAll(textPositions);
        }

        private List<TextPosition> extract() throws IOException {
            this.stripPage(this.pageId);
            Collections.sort(this.textPositions, new Comparator<TextPosition>(){

                @Override
                public int compare(TextPosition o1, TextPosition o2) {
                    int retVal = 0;
                    if (o1.getY() < o2.getY()) {
                        retVal = -1;
                    } else if (o1.getY() > o2.getY()) {
                        retVal = 1;
                    }
                    return retVal;
                }
            });
            return this.textPositions;
        }
    }
}

