/*
 * Decompiled with CFR 0.152.
 */
package com.aliasi.chunk;

import com.aliasi.chunk.Chunking;
import com.aliasi.chunk.HmmChunker;
import com.aliasi.corpus.ChunkHandler;
import com.aliasi.corpus.ChunkHandlerAdapter;
import com.aliasi.corpus.TagHandler;
import com.aliasi.hmm.AbstractHmmEstimator;
import com.aliasi.hmm.HiddenMarkovModel;
import com.aliasi.hmm.HmmDecoder;
import com.aliasi.symbol.SymbolTable;
import com.aliasi.tokenizer.Tokenizer;
import com.aliasi.tokenizer.TokenizerFactory;
import com.aliasi.util.AbstractExternalizable;
import com.aliasi.util.Compilable;
import com.aliasi.util.Strings;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.util.HashSet;
import java.util.Iterator;
import java.util.TreeSet;

public class CharLmHmmChunker
extends HmmChunker
implements Compilable,
ChunkHandler,
TagHandler {
    private final AbstractHmmEstimator mHmmEstimator;
    private final TokenizerFactory mTokenizerFactory;
    private final HashSet mTagSet = new HashSet();
    private final boolean mSmoothTags;

    public CharLmHmmChunker(TokenizerFactory tokenizerFactory, AbstractHmmEstimator hmmEstimator) {
        this(tokenizerFactory, hmmEstimator, false);
    }

    public CharLmHmmChunker(TokenizerFactory tokenizerFactory, AbstractHmmEstimator hmmEstimator, boolean smoothTags) {
        super(tokenizerFactory, new HmmDecoder(hmmEstimator));
        this.mHmmEstimator = hmmEstimator;
        this.mTokenizerFactory = tokenizerFactory;
        this.mSmoothTags = smoothTags;
        this.smoothBoundaries();
    }

    public AbstractHmmEstimator getHmmEstimator() {
        return this.mHmmEstimator;
    }

    public TokenizerFactory getTokenizerFactory() {
        return this.mTokenizerFactory;
    }

    public void trainDictionary(CharSequence cSeq, String type) {
        char[] cs = Strings.toCharArray(cSeq);
        Tokenizer tokenizer = this.getTokenizerFactory().tokenizer(cs, 0, cs.length);
        String[] tokens = tokenizer.tokenize();
        if (tokens.length < 1) {
            String msg = "Did not find any tokens in entry.Char sequence=" + cSeq;
            throw new IllegalArgumentException(msg);
        }
        AbstractHmmEstimator estimator = this.getHmmEstimator();
        SymbolTable table = estimator.stateSymbolTable();
        this.smoothBaseTag(type, table, estimator);
        if (tokens.length == 1) {
            estimator.trainEmit("W_" + type, tokens[0]);
            return;
        }
        String initialTag = "B_" + type;
        estimator.trainEmit(initialTag, tokens[0]);
        String prevTag = initialTag;
        int i = 1;
        while (i + 1 < tokens.length) {
            String tag = "M_" + type;
            estimator.trainEmit(tag, tokens[i]);
            estimator.trainTransit(prevTag, tag);
            prevTag = tag;
            ++i;
        }
        String finalTag = "E_" + type;
        estimator.trainEmit(finalTag, tokens[tokens.length - 1]);
        estimator.trainTransit(prevTag, finalTag);
    }

    public void handle(Chunking chunking) {
        ChunkHandlerAdapter adapter = new ChunkHandlerAdapter(this, this.getTokenizerFactory(), false);
        adapter.handle(chunking);
    }

    public void handle(String[] tokens, String[] whitespaces, String[] tags) {
        this.getHmmEstimator().handle(tokens, whitespaces, CharLmHmmChunker.trainNormalize(tags));
        this.smoothTags(tags);
    }

    public void compileTo(ObjectOutput objOut) throws IOException {
        if (!(this.mTokenizerFactory instanceof Compilable)) {
            String msg = "Tokenizer factory must implement class= com.aliasi.util.Compilable  Found class=" + this.mTokenizerFactory.getClass();
            throw new IllegalArgumentException(msg);
        }
        objOut.writeObject(new Externalizer(this));
    }

    public String toString() {
        StringBuilder sb = new StringBuilder();
        TreeSet<String> expandedTagSet = new TreeSet<String>();
        expandedTagSet.add("MM_O");
        expandedTagSet.add("WW_O_BOS");
        expandedTagSet.add("BB_O_BOS");
        expandedTagSet.add("EE_O_BOS");
        for (Object e : this.mTagSet) {
            String x = e.toString();
            expandedTagSet.add("B_" + x);
            expandedTagSet.add("M_" + x);
            expandedTagSet.add("E_" + x);
            expandedTagSet.add("W_" + x);
            expandedTagSet.add("BB_O_" + x);
            expandedTagSet.add("EE_O_" + x);
            expandedTagSet.add("WW_O_" + x);
        }
        for (Object e : expandedTagSet) {
            String tag0 = e.toString();
            sb.append("\n");
            sb.append("start(" + tag0 + ")=" + this.mHmmEstimator.startLog2Prob(tag0));
            sb.append("\n");
            sb.append("  end(" + tag0 + ")=" + this.mHmmEstimator.endLog2Prob(tag0));
            sb.append("\n");
            for (Object e2 : expandedTagSet) {
                String tag1 = e2.toString();
                sb.append("trans(" + tag0 + "," + tag1 + ")=" + this.mHmmEstimator.transitLog2Prob(tag0, tag1));
                sb.append("\n");
            }
        }
        return sb.toString();
    }

    void smoothBoundaries() {
        AbstractHmmEstimator hmmEstimator = this.getHmmEstimator();
        SymbolTable table = hmmEstimator.stateSymbolTable();
        String bbO = "BB_O_BOS";
        String mmO = "MM_O";
        String eeO = "EE_O_BOS";
        String wwO = "WW_O_BOS";
        table.getOrAddSymbol(bbO);
        table.getOrAddSymbol(mmO);
        table.getOrAddSymbol(eeO);
        table.getOrAddSymbol(wwO);
        hmmEstimator.trainStart(bbO);
        hmmEstimator.trainStart(wwO);
        hmmEstimator.trainEnd(eeO);
        hmmEstimator.trainEnd(wwO);
        hmmEstimator.trainTransit(bbO, mmO);
        hmmEstimator.trainTransit(bbO, eeO);
        hmmEstimator.trainTransit(mmO, mmO);
        hmmEstimator.trainTransit(mmO, eeO);
    }

    void smoothTags(String[] tags) {
        if (!this.mSmoothTags) {
            return;
        }
        AbstractHmmEstimator hmmEstimator = this.getHmmEstimator();
        SymbolTable table = hmmEstimator.stateSymbolTable();
        for (int i = 0; i < tags.length; ++i) {
            this.smoothTag(tags[i], table, hmmEstimator);
        }
    }

    void smoothTag(String tag, SymbolTable table, AbstractHmmEstimator hmmEstimator) {
        this.smoothBaseTag(HmmChunker.baseTag(tag), table, hmmEstimator);
    }

    void smoothBaseTag(String baseTag, SymbolTable table, AbstractHmmEstimator hmmEstimator) {
        if (!this.mTagSet.add(baseTag)) {
            return;
        }
        if ("O".equals(baseTag)) {
            return;
        }
        String b_x = "B_" + baseTag;
        String m_x = "M_" + baseTag;
        String e_x = "E_" + baseTag;
        String w_x = "W_" + baseTag;
        String bb_o_x = "BB_O_" + baseTag;
        String ee_o_x = "EE_O_" + baseTag;
        String ww_o_x = "WW_O_" + baseTag;
        table.getOrAddSymbol(b_x);
        table.getOrAddSymbol(m_x);
        table.getOrAddSymbol(e_x);
        table.getOrAddSymbol(w_x);
        table.getOrAddSymbol(bb_o_x);
        table.getOrAddSymbol(ee_o_x);
        table.getOrAddSymbol(ww_o_x);
        hmmEstimator.trainStart(b_x);
        hmmEstimator.trainTransit(b_x, m_x);
        hmmEstimator.trainTransit(b_x, e_x);
        hmmEstimator.trainTransit(m_x, m_x);
        hmmEstimator.trainTransit(m_x, e_x);
        hmmEstimator.trainEnd(e_x);
        hmmEstimator.trainTransit(e_x, bb_o_x);
        hmmEstimator.trainStart(w_x);
        hmmEstimator.trainEnd(w_x);
        hmmEstimator.trainTransit(w_x, bb_o_x);
        hmmEstimator.trainTransit(bb_o_x, "MM_O");
        hmmEstimator.trainTransit("MM_O", ee_o_x);
        hmmEstimator.trainTransit(ee_o_x, b_x);
        hmmEstimator.trainTransit(ee_o_x, w_x);
        hmmEstimator.trainStart(ww_o_x);
        hmmEstimator.trainTransit(ww_o_x, b_x);
        hmmEstimator.trainTransit(ww_o_x, w_x);
        hmmEstimator.trainTransit(e_x, "WW_O_BOS");
        hmmEstimator.trainTransit(w_x, "WW_O_BOS");
        hmmEstimator.trainTransit(bb_o_x, "EE_O_BOS");
        hmmEstimator.trainTransit("BB_O_BOS", ee_o_x);
        Iterator it = this.mTagSet.iterator();
        while (it.hasNext()) {
            String type = it.next().toString();
            if ("O".equals(type) || "BOS".equals(type)) continue;
            String bb_o_y = "BB_O_" + type;
            String ww_o_y = "WW_O_" + type;
            String ee_o_y = "EE_O_" + type;
            String b_y = "B_" + type;
            String w_y = "W_" + type;
            String e_y = "E_" + type;
            hmmEstimator.trainTransit(e_x, ww_o_y);
            hmmEstimator.trainTransit(e_x, b_y);
            hmmEstimator.trainTransit(e_x, w_y);
            hmmEstimator.trainTransit(w_x, ww_o_y);
            hmmEstimator.trainTransit(w_x, b_y);
            hmmEstimator.trainTransit(w_x, w_y);
            hmmEstimator.trainTransit(e_y, b_x);
            hmmEstimator.trainTransit(e_y, w_x);
            hmmEstimator.trainTransit(e_y, ww_o_x);
            hmmEstimator.trainTransit(w_y, b_x);
            hmmEstimator.trainTransit(w_y, w_x);
            hmmEstimator.trainTransit(w_y, ww_o_x);
            hmmEstimator.trainTransit(bb_o_x, ee_o_y);
            hmmEstimator.trainTransit(bb_o_y, ee_o_x);
        }
    }

    static class Externalizer
    extends AbstractExternalizable {
        private static final long serialVersionUID = 4630707998932521821L;
        final CharLmHmmChunker mChunker;

        public Externalizer() {
            this(null);
        }

        public Externalizer(CharLmHmmChunker chunker) {
            this.mChunker = chunker;
        }

        public Object read(ObjectInput in) throws ClassNotFoundException, IOException {
            TokenizerFactory tokenizerFactory = (TokenizerFactory)in.readObject();
            HiddenMarkovModel hmm = (HiddenMarkovModel)in.readObject();
            HmmDecoder decoder = new HmmDecoder(hmm);
            return new HmmChunker(tokenizerFactory, decoder);
        }

        public void writeExternal(ObjectOutput objOut) throws IOException {
            Compilable tokenizerFactory = (Compilable)((Object)this.mChunker.getTokenizerFactory());
            tokenizerFactory.compileTo(objOut);
            this.mChunker.getHmmEstimator().compileTo(objOut);
        }
    }
}

