/*
 * Decompiled with CFR 0.152.
 */
package com.aliasi.corpus;

import com.aliasi.chunk.Chunk;
import com.aliasi.chunk.Chunking;
import com.aliasi.corpus.ChunkHandler;
import com.aliasi.corpus.TagHandler;
import com.aliasi.tokenizer.Tokenizer;
import com.aliasi.tokenizer.TokenizerFactory;
import com.aliasi.util.Strings;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Set;

public class ChunkHandlerAdapter
implements ChunkHandler {
    private final TokenizerFactory mTokenizerFactory;
    private TagHandler mTagHandler;
    private boolean mValidateTokenizer;

    public ChunkHandlerAdapter(TagHandler tagHandler, TokenizerFactory tokenizerFactory, boolean validateTokenizer) {
        this(tokenizerFactory, validateTokenizer);
        this.mTagHandler = tagHandler;
    }

    public ChunkHandlerAdapter(TokenizerFactory tokenizerFactory, boolean validateTokenizer) {
        this.mTokenizerFactory = tokenizerFactory;
        this.mValidateTokenizer = validateTokenizer;
    }

    public void setTagHandler(TagHandler tagHandler) {
        this.mTagHandler = tagHandler;
    }

    public void setValidateTokenizer(boolean validateTokenizer) {
        this.mValidateTokenizer = validateTokenizer;
    }

    public void handle(Chunking chunking) {
        CharSequence cSeq = chunking.charSequence();
        char[] cs = Strings.toCharArray(cSeq);
        Set<Chunk> chunkSet = chunking.chunkSet();
        Chunk[] chunks = new Chunk[chunkSet.size()];
        chunkSet.toArray(chunks);
        Arrays.sort(chunks, Chunk.TEXT_ORDER_COMPARATOR);
        ArrayList tokenList = new ArrayList();
        ArrayList whiteList = new ArrayList();
        ArrayList tagList = new ArrayList();
        int pos = 0;
        for (Chunk nextChunk : chunks) {
            String type = nextChunk.type();
            int start = nextChunk.start();
            int end = nextChunk.end();
            ChunkHandlerAdapter.outTag(cs, pos, start, tokenList, whiteList, tagList, this.mTokenizerFactory);
            ChunkHandlerAdapter.chunkTag(cs, start, end, type, tokenList, whiteList, tagList, this.mTokenizerFactory);
            pos = end;
        }
        ChunkHandlerAdapter.outTag(cs, pos, cSeq.length(), tokenList, whiteList, tagList, this.mTokenizerFactory);
        String[] toks = new String[tokenList.size()];
        tokenList.toArray(toks);
        String[] whites = new String[whiteList.size()];
        whiteList.toArray(whites);
        String[] tags = new String[tagList.size()];
        tagList.toArray(tags);
        if (this.mValidateTokenizer && !ChunkHandlerAdapter.consistentTokens(toks, whites, this.mTokenizerFactory)) {
            String msg = "Tokens not consistent with tokenizer factory. Tokens=" + Arrays.asList(toks) + " Tokenization=" + this.tokenization(toks, whites) + " Factory class=" + this.mTokenizerFactory.getClass();
            throw new IllegalArgumentException(msg);
        }
        this.mTagHandler.handle(toks, whites, tags);
    }

    public static String[] toTags(Chunking chunking, TokenizerFactory factory) {
        CharSequence cSeq = chunking.charSequence();
        char[] cs = Strings.toCharArray(cSeq);
        Set<Chunk> chunkSet = chunking.chunkSet();
        Chunk[] chunks = new Chunk[chunkSet.size()];
        chunkSet.toArray(chunks);
        Arrays.sort(chunks, Chunk.TEXT_ORDER_COMPARATOR);
        ArrayList tokenList = new ArrayList();
        ArrayList whiteList = new ArrayList();
        ArrayList tagList = new ArrayList();
        int pos = 0;
        for (Chunk nextChunk : chunks) {
            String type = nextChunk.type();
            int start = nextChunk.start();
            int end = nextChunk.end();
            ChunkHandlerAdapter.outTag(cs, pos, start, tokenList, whiteList, tagList, factory);
            ChunkHandlerAdapter.chunkTag(cs, start, end, type, tokenList, whiteList, tagList, factory);
            pos = end;
        }
        ChunkHandlerAdapter.outTag(cs, pos, cSeq.length(), tokenList, whiteList, tagList, factory);
        String[] tags = new String[tagList.size()];
        tagList.toArray(tags);
        return tags;
    }

    public static boolean consistentTokens(String[] toks, String[] whitespaces, TokenizerFactory tokenizerFactory) {
        if (toks.length + 1 != whitespaces.length) {
            return false;
        }
        char[] cs = ChunkHandlerAdapter.getChars(toks, whitespaces);
        Tokenizer tokenizer = tokenizerFactory.tokenizer(cs, 0, cs.length);
        String nextWhitespace = tokenizer.nextWhitespace();
        if (!whitespaces[0].equals(nextWhitespace)) {
            return false;
        }
        for (int i = 0; i < toks.length; ++i) {
            String token = tokenizer.nextToken();
            if (token == null) {
                return false;
            }
            if (!toks[i].equals(token)) {
                return false;
            }
            nextWhitespace = tokenizer.nextWhitespace();
            if (whitespaces[i + 1].equals(nextWhitespace)) continue;
            return false;
        }
        return true;
    }

    static void outTag(char[] cs, int start, int end, ArrayList tokenList, ArrayList whiteList, ArrayList tagList, TokenizerFactory factory) {
        String nextToken;
        Tokenizer tokenizer = factory.tokenizer(cs, start, end - start);
        whiteList.add(tokenizer.nextWhitespace());
        while ((nextToken = tokenizer.nextToken()) != null) {
            tokenList.add(nextToken);
            tagList.add("O");
            whiteList.add(tokenizer.nextWhitespace());
        }
    }

    static void chunkTag(char[] cs, int start, int end, String type, ArrayList tokenList, ArrayList whiteList, ArrayList tagList, TokenizerFactory factory) {
        Tokenizer tokenizer = factory.tokenizer(cs, start, end - start);
        String firstToken = tokenizer.nextToken();
        tokenList.add(firstToken);
        tagList.add("B-" + type);
        while (true) {
            String nextWhitespace = tokenizer.nextWhitespace();
            String nextToken = tokenizer.nextToken();
            if (nextToken == null) break;
            tokenList.add(nextToken);
            whiteList.add(nextWhitespace);
            tagList.add("I-" + type);
        }
    }

    List tokenization(String[] toks, String[] whitespaces) {
        ArrayList tokList = new ArrayList();
        ArrayList whiteList = new ArrayList();
        char[] cs = ChunkHandlerAdapter.getChars(toks, whitespaces);
        Tokenizer tokenizer = this.mTokenizerFactory.tokenizer(cs, 0, cs.length);
        tokenizer.tokenize(tokList, whiteList);
        return tokList;
    }

    static char[] getChars(String[] toks, String[] whitespaces) {
        StringBuffer sb = new StringBuffer();
        for (int i = 0; i < toks.length; ++i) {
            sb.append(whitespaces[i]);
            sb.append(toks[i]);
        }
        sb.append(whitespaces[whitespaces.length - 1]);
        return Strings.toCharArray(sb);
    }
}

