/*
 * Decompiled with CFR 0.152.
 */
package org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers;

import org.apache.hyracks.storage.am.lsm.invertedindex.fulltext.TokenizerCategory;
import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.AbstractUTF8StringBinaryTokenizer;
import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.INGramToken;
import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.ITokenFactory;
import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.TokenizerInfo;
import org.apache.hyracks.util.string.UTF8StringUtil;

public class NGramUTF8StringBinaryTokenizer
extends AbstractUTF8StringBinaryTokenizer {
    private int gramLength;
    private boolean usePrePost;
    private int gramNum;
    private int totalGrams;
    private final INGramToken concreteToken;

    public NGramUTF8StringBinaryTokenizer(int gramLength, boolean usePrePost, boolean ignoreTokenCount, boolean sourceHasTypeTag, ITokenFactory tokenFactory) {
        super(ignoreTokenCount, sourceHasTypeTag, tokenFactory);
        this.gramLength = gramLength;
        this.usePrePost = usePrePost;
        this.concreteToken = (INGramToken)((Object)this.token);
    }

    @Override
    public boolean hasNext() {
        return this.gramNum < this.totalGrams;
    }

    @Override
    public void next() {
        int currentTokenStart = this.byteIndex;
        int tokenCount = 1;
        int numPreChars = 0;
        int numPostChars = 0;
        if (this.usePrePost) {
            numPreChars = Math.max(this.gramLength - this.gramNum - 1, 0);
            numPostChars = this.gramNum > this.totalGrams - this.gramLength ? this.gramLength - this.totalGrams + this.gramNum : 0;
        }
        ++this.gramNum;
        this.concreteToken.setNumPrePostChars(numPreChars, numPostChars);
        if (numPreChars == 0) {
            this.byteIndex += UTF8StringUtil.charSize((byte[])this.sentenceBytes, (int)this.byteIndex);
        }
        if (!this.ignoreTokenCount && numPreChars == 0 && numPostChars == 0) {
            int tmpIndex = this.sentenceStartOffset;
            if (this.sourceHasTypeTag) {
                ++tmpIndex;
            }
            int utfLength = UTF8StringUtil.getUTFLength((byte[])this.sentenceBytes, (int)tmpIndex);
            tmpIndex += UTF8StringUtil.getNumBytesToStoreLength((int)utfLength);
            while (tmpIndex < currentTokenStart) {
                ++tokenCount;
                int offset = 0;
                for (int j = 0; j < this.gramLength; ++j) {
                    if (Character.toLowerCase(UTF8StringUtil.charAt((byte[])this.sentenceBytes, (int)(currentTokenStart + offset))) != Character.toLowerCase(UTF8StringUtil.charAt((byte[])this.sentenceBytes, (int)(tmpIndex + offset)))) {
                        --tokenCount;
                        break;
                    }
                    offset += UTF8StringUtil.charSize((byte[])this.sentenceBytes, (int)(tmpIndex + offset));
                }
                tmpIndex += UTF8StringUtil.charSize((byte[])this.sentenceBytes, (int)tmpIndex);
            }
        }
        this.token.reset(this.sentenceBytes, currentTokenStart, this.sentenceEndOffset, this.gramLength, tokenCount);
    }

    @Override
    public void reset(byte[] sentenceData, int start, int length) {
        int pos;
        super.reset(sentenceData, start, length);
        this.gramNum = 0;
        int numChars = 0;
        int end = pos + this.sentenceUtf8Length;
        for (pos = this.byteIndex; pos < end; pos += UTF8StringUtil.charSize((byte[])sentenceData, (int)pos)) {
            ++numChars;
        }
        this.totalGrams = this.usePrePost ? numChars + this.gramLength - 1 : (numChars >= this.gramLength ? numChars - this.gramLength + 1 : 0);
    }

    public void setGramlength(int gramLength) {
        this.gramLength = gramLength;
    }

    public void setPrePost(boolean usePrePost) {
        this.usePrePost = usePrePost;
    }

    @Override
    public short getTokensCount() {
        return (short)this.totalGrams;
    }

    @Override
    public TokenizerInfo.TokenizerType getTokenizerType() {
        return TokenizerInfo.TokenizerType.STRING;
    }

    @Override
    public TokenizerCategory getTokenizerCategory() {
        return TokenizerCategory.NGRAM;
    }
}

