/*
 * Decompiled with CFR 0.152.
 */
package com.aliasi.tokenizer;

import com.aliasi.tokenizer.FilterTokenizer;
import com.aliasi.tokenizer.Tokenizer;
import com.aliasi.util.Strings;

public class SoundexFilterTokenizer
extends FilterTokenizer {
    static char NON_CHAR_CODE;
    static final char[] INITIAL_CODES;
    static final char[] CODES;
    static final boolean[] VOWELS;

    public SoundexFilterTokenizer(Tokenizer tokenizer) {
        super(tokenizer);
    }

    public String filter(String token) {
        return SoundexFilterTokenizer.soundexEncoding(token);
    }

    public static String soundexEncoding(String token) {
        char c;
        int pos;
        for (pos = 0; pos < token.length() && ((c = token.charAt(pos)) >= '\u0100' || INITIAL_CODES[c] == NON_CHAR_CODE); ++pos) {
        }
        if (pos == token.length()) {
            return "0000";
        }
        int csPos = 1;
        char[] cs = new char[4];
        cs[0] = INITIAL_CODES[token.charAt(pos)];
        char lastCode = CODES[token.charAt(pos)];
        ++pos;
        while (csPos < 4 && pos < token.length()) {
            char c2 = token.charAt(pos);
            ++pos;
            if (c2 > '\u00ff') continue;
            char code = CODES[c2];
            if (code == NON_CHAR_CODE) {
                if (!VOWELS[c2]) continue;
                lastCode = '7';
                continue;
            }
            if (code == lastCode) continue;
            cs[csPos] = code;
            lastCode = code;
            ++csPos;
        }
        while (csPos < 4) {
            cs[csPos] = 48;
            ++csPos;
        }
        return new String(cs);
    }

    static char soundexCode(char upperCaseLetter) {
        switch (upperCaseLetter) {
            case 'B': {
                return '1';
            }
            case 'F': {
                return '1';
            }
            case 'P': {
                return '1';
            }
            case 'V': {
                return '1';
            }
            case 'C': {
                return '2';
            }
            case 'G': {
                return '2';
            }
            case 'J': {
                return '2';
            }
            case 'K': {
                return '2';
            }
            case 'Q': {
                return '2';
            }
            case 'S': {
                return '2';
            }
            case 'X': {
                return '2';
            }
            case 'Z': {
                return '2';
            }
            case 'D': {
                return '3';
            }
            case 'T': {
                return '3';
            }
            case 'L': {
                return '4';
            }
            case 'M': {
                return '5';
            }
            case 'N': {
                return '5';
            }
            case 'R': {
                return '6';
            }
        }
        return NON_CHAR_CODE;
    }

    static {
        int i;
        NON_CHAR_CODE = (char)255;
        INITIAL_CODES = new char[256];
        CODES = new char[256];
        for (i = 0; i < 256; ++i) {
            char c = (char)i;
            if (!Character.isLetter(c)) {
                SoundexFilterTokenizer.INITIAL_CODES[i] = NON_CHAR_CODE;
                SoundexFilterTokenizer.CODES[i] = NON_CHAR_CODE;
                continue;
            }
            SoundexFilterTokenizer.INITIAL_CODES[i] = Character.toUpperCase(Strings.deAccentLatin1(c));
            SoundexFilterTokenizer.CODES[i] = SoundexFilterTokenizer.soundexCode(INITIAL_CODES[i]);
        }
        VOWELS = new boolean[256];
        for (i = 0; i < 256; ++i) {
            char initCode = INITIAL_CODES[i];
            SoundexFilterTokenizer.VOWELS[i] = initCode == 'A' || initCode == 'E' || initCode == 'I' || initCode == 'O' || initCode == 'U';
        }
    }
}

