/*
 * Decompiled with CFR 0.152.
 */
package org.omegat.tokenizer;

import java.io.IOException;
import java.io.StringReader;
import java.util.Collections;
import java.util.Set;
import net.moraleboost.io.BasicCodePointReader;
import net.moraleboost.io.CodePointReader;
import net.moraleboost.tinysegmenter.TinySegmenter;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.omegat.tokenizer.BaseTokenizer;
import org.omegat.tokenizer.Tokenizer;

@Tokenizer(languages={"ja"})
public class TinySegmenterJapaneseTokenizer
extends BaseTokenizer {
    public TinySegmenterJapaneseTokenizer() {
        this.shouldDelegateTokenizeExactly = false;
    }

    @Override
    protected TokenStream getTokenStream(String strOrig, boolean stemsAllowed, boolean stopWordsAllowed) {
        TokenStreamWrapper ts = new TokenStreamWrapper(new BasicCodePointReader(new StringReader(strOrig)));
        if (stemsAllowed) {
            Set<Object> stopWords = stopWordsAllowed ? CJKAnalyzer.getDefaultStopSet() : Collections.emptySet();
            return new StopFilter(this.getBehavior(), (TokenStream)ts, stopWords);
        }
        return ts;
    }

    public static class TokenStreamWrapper
    extends TokenStream {
        private TinySegmenter ts;
        private CharTermAttribute termAttr;
        private OffsetAttribute offAttr;

        public TokenStreamWrapper(CodePointReader reader) {
            this.ts = new TinySegmenter(reader);
            this.termAttr = this.addAttribute(CharTermAttribute.class);
            this.offAttr = this.addAttribute(OffsetAttribute.class);
        }

        @Override
        public boolean incrementToken() throws IOException {
            TinySegmenter.Token token = this.ts.next();
            if (token == null) {
                return false;
            }
            this.termAttr.setEmpty();
            this.termAttr.append(token.str);
            this.offAttr.setOffset((int)token.start, (int)token.end);
            return true;
        }
    }
}

