/*
 * Decompiled with CFR 0.152.
 */
package org.omegat.tokenizer;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Locale;
import java.util.Map;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.util.Version;
import org.omegat.core.Core;
import org.omegat.core.CoreEvents;
import org.omegat.core.data.IProject;
import org.omegat.core.data.SourceTextEntry;
import org.omegat.core.events.IProjectEventListener;
import org.omegat.gui.comments.ICommentProvider;
import org.omegat.tokenizer.ITokenizer;
import org.omegat.tokenizer.Tokenizer;
import org.omegat.tokenizer.WordIterator;
import org.omegat.util.Language;
import org.omegat.util.StringUtil;
import org.omegat.util.Token;

public abstract class BaseTokenizer
implements ITokenizer {
    private static final Map<String, Token[]> tokenCacheNone = new HashMap<String, Token[]>(5000);
    private static final Map<String, Token[]> tokenCacheMatching = new HashMap<String, Token[]>(5000);
    private static final Map<String, Token[]> tokenCacheGlossary = new HashMap<String, Token[]>(5000);
    protected static final Map<Version, String> supportedBehaviors = new LinkedHashMap<Version, String>(Version.values().length);
    protected static final String[] EMPTY_STRING_LIST = new String[0];
    protected static final Token[] EMPTY_TOKENS_LIST = new Token[0];
    protected static final int DEFAULT_TOKENS_COUNT = 64;
    protected boolean shouldDelegateTokenizeExactly = true;
    protected Version defaultBehavior = Version.LUCENE_36;
    protected Version currentBehavior = null;
    public static ICommentProvider TOKENIZER_DEBUG_PROVIDER = new ICommentProvider(){

        @Override
        public String getComment(SourceTextEntry newEntry) {
            return ((BaseTokenizer)Core.getProject().getSourceTokenizer()).test(newEntry.getSrcText());
        }
    };

    public BaseTokenizer() {
        CoreEvents.registerProjectChangeListener(new IProjectEventListener(){

            /*
             * WARNING - Removed try catching itself - possible behaviour change.
             */
            @Override
            public void onProjectChanged(IProjectEventListener.PROJECT_CHANGE_TYPE eventType) {
                if (eventType == IProjectEventListener.PROJECT_CHANGE_TYPE.CLOSE) {
                    Map map = tokenCacheNone;
                    synchronized (map) {
                        tokenCacheNone.clear();
                    }
                    map = tokenCacheMatching;
                    synchronized (map) {
                        tokenCacheMatching.clear();
                    }
                    map = tokenCacheGlossary;
                    synchronized (map) {
                        tokenCacheGlossary.clear();
                    }
                }
            }
        });
    }

    @Override
    public Map<Version, String> getSupportedBehaviors() {
        return supportedBehaviors;
    }

    @Override
    public Version getBehavior() {
        return this.currentBehavior == null ? this.defaultBehavior : this.currentBehavior;
    }

    @Override
    public void setBehavior(Version behavior) {
        this.currentBehavior = behavior;
    }

    @Override
    public Version getDefaultBehavior() {
        return this.defaultBehavior;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Override
    public Token[] tokenizeWords(String strOrig, ITokenizer.StemmingMode stemmingMode) {
        Token[] result;
        Map<String, Token[]> cache;
        switch (stemmingMode) {
            case NONE: {
                cache = tokenCacheNone;
                break;
            }
            case GLOSSARY: {
                cache = tokenCacheGlossary;
                break;
            }
            case MATCHING: {
                cache = tokenCacheMatching;
                break;
            }
            default: {
                throw new RuntimeException("No cache for specified stemming mode");
            }
        }
        Map<String, Token[]> map = cache;
        synchronized (map) {
            result = cache.get(strOrig);
        }
        if (result != null) {
            return result;
        }
        result = this.tokenize(strOrig, stemmingMode == ITokenizer.StemmingMode.GLOSSARY || stemmingMode == ITokenizer.StemmingMode.MATCHING, stemmingMode == ITokenizer.StemmingMode.MATCHING, stemmingMode != ITokenizer.StemmingMode.GLOSSARY, true);
        map = cache;
        synchronized (map) {
            cache.put(strOrig, result);
        }
        return result;
    }

    @Override
    public String[] tokenizeWordsToStrings(String str, ITokenizer.StemmingMode stemmingMode) {
        return this.tokenizeToStrings(str, stemmingMode == ITokenizer.StemmingMode.GLOSSARY || stemmingMode == ITokenizer.StemmingMode.MATCHING, stemmingMode == ITokenizer.StemmingMode.MATCHING, stemmingMode != ITokenizer.StemmingMode.GLOSSARY, true);
    }

    @Override
    public Token[] tokenizeVerbatim(String strOrig) {
        if (StringUtil.isEmpty(strOrig)) {
            return EMPTY_TOKENS_LIST;
        }
        if (!this.shouldDelegateTokenizeExactly) {
            return this.tokenize(strOrig, false, false, false, false);
        }
        ArrayList<Token> result = new ArrayList<Token>(64);
        WordIterator iterator = new WordIterator();
        iterator.setText(strOrig);
        int start = iterator.first();
        int end = iterator.next();
        while (end != -1) {
            String tokenStr = strOrig.substring(start, end);
            result.add(new Token(tokenStr, start));
            start = end;
            end = iterator.next();
        }
        return result.toArray(new Token[result.size()]);
    }

    @Override
    public String[] tokenizeVerbatimToStrings(String str) {
        if (StringUtil.isEmpty(str)) {
            return EMPTY_STRING_LIST;
        }
        if (!this.shouldDelegateTokenizeExactly) {
            return this.tokenizeToStrings(str, false, false, false, false);
        }
        ArrayList<String> result = new ArrayList<String>(64);
        WordIterator iterator = new WordIterator();
        iterator.setText(str);
        int start = iterator.first();
        int end = iterator.next();
        while (end != -1) {
            String tokenStr = str.substring(start, end);
            result.add(tokenStr);
            start = end;
            end = iterator.next();
        }
        return result.toArray(new String[result.size()]);
    }

    protected Token[] tokenizeByCodePoint(String strOrig) {
        int cp;
        Token[] tokens = new Token[strOrig.codePointCount(0, strOrig.length())];
        int j = 0;
        for (int i = 0; i < strOrig.length(); i += Character.charCount(cp)) {
            cp = strOrig.codePointAt(i);
            tokens[j++] = new Token(String.valueOf(Character.toChars(cp)), i);
        }
        return tokens;
    }

    protected String[] tokenizeByCodePointToStrings(String strOrig) {
        int cp;
        String[] tokens = new String[strOrig.codePointCount(0, strOrig.length())];
        int j = 0;
        for (int i = 0; i < strOrig.length(); i += Character.charCount(cp)) {
            cp = strOrig.codePointAt(i);
            tokens[j++] = String.valueOf(Character.toChars(cp));
        }
        return tokens;
    }

    protected Token[] tokenize(String strOrig, boolean stemsAllowed, boolean stopWordsAllowed, boolean filterDigits, boolean filterWhitespace) {
        if (StringUtil.isEmpty(strOrig)) {
            return EMPTY_TOKENS_LIST;
        }
        ArrayList<Token> result = new ArrayList<Token>(64);
        TokenStream in = this.getTokenStream(strOrig, stemsAllowed, stopWordsAllowed);
        in.addAttribute(CharTermAttribute.class);
        in.addAttribute(OffsetAttribute.class);
        CharTermAttribute cattr = in.getAttribute(CharTermAttribute.class);
        OffsetAttribute off = in.getAttribute(OffsetAttribute.class);
        try {
            in.reset();
            while (in.incrementToken()) {
                String tokenText = cattr.toString();
                if (!this.acceptToken(tokenText, filterDigits, filterWhitespace)) continue;
                result.add(new Token(tokenText, off.startOffset(), off.endOffset() - off.startOffset()));
            }
            in.end();
            in.close();
        }
        catch (IOException iOException) {
            // empty catch block
        }
        return result.toArray(new Token[result.size()]);
    }

    protected String[] tokenizeToStrings(String str, boolean stemsAllowed, boolean stopWordsAllowed, boolean filterDigits, boolean filterWhitespace) {
        if (StringUtil.isEmpty(str)) {
            return EMPTY_STRING_LIST;
        }
        ArrayList<String> result = new ArrayList<String>(64);
        TokenStream in = this.getTokenStream(str, stemsAllowed, stopWordsAllowed);
        in.addAttribute(CharTermAttribute.class);
        in.addAttribute(OffsetAttribute.class);
        CharTermAttribute cattr = in.getAttribute(CharTermAttribute.class);
        OffsetAttribute off = in.getAttribute(OffsetAttribute.class);
        Locale loc = stemsAllowed ? this.getLanguage().getLocale() : null;
        try {
            in.reset();
            while (in.incrementToken()) {
                String origText;
                String tokenText = cattr.toString();
                if (!this.acceptToken(tokenText, filterDigits, filterWhitespace)) continue;
                result.add(tokenText);
                if (!stemsAllowed || (origText = str.substring(off.startOffset(), off.endOffset())).toLowerCase(loc).equals(tokenText.toLowerCase(loc))) continue;
                result.add(origText);
            }
            in.end();
            in.close();
        }
        catch (IOException iOException) {
            // empty catch block
        }
        return result.toArray(new String[result.size()]);
    }

    private boolean acceptToken(String token, boolean filterDigits, boolean filterWhitespace) {
        int cp;
        if (StringUtil.isEmpty(token)) {
            return false;
        }
        if (!filterDigits && !filterWhitespace) {
            return true;
        }
        boolean isWhitespaceOnly = true;
        for (int i = 0; i < token.length(); i += Character.charCount(cp)) {
            cp = token.codePointAt(i);
            if (filterDigits && Character.isDigit(cp)) {
                return false;
            }
            if (!filterWhitespace || StringUtil.isWhiteSpace(cp)) continue;
            isWhitespaceOnly = false;
        }
        return !filterWhitespace || !isWhitespaceOnly;
    }

    protected abstract TokenStream getTokenStream(String var1, boolean var2, boolean var3);

    @Override
    public String[] getSupportedLanguages() {
        Tokenizer ann = this.getClass().getAnnotation(Tokenizer.class);
        if (ann == null) {
            throw new RuntimeException(this.getClass().getName() + " must have a " + Tokenizer.class.getName() + " annotation available at runtime.");
        }
        return ann.languages();
    }

    protected Language getLanguage() {
        String[] languages = this.getSupportedLanguages();
        if (languages.length == 0 || languages[0] == "discoverAtRuntime") {
            IProject proj = Core.getProject();
            if (proj == null) {
                throw new RuntimeException("This tokenizer's language can only be determined in the context of a project, but project is null.");
            }
            if (proj.getSourceTokenizer() == this) {
                return proj.getProjectProperties().getSourceLanguage();
            }
            if (proj.getTargetTokenizer() == this) {
                return proj.getProjectProperties().getTargetLanguage();
            }
            throw new RuntimeException("This tokenizer's language can only be determined in the context of a project, but is not assigned to current project.");
        }
        return new Language(languages[0]);
    }

    protected String test(String ... args) {
        StringBuilder sb = new StringBuilder();
        sb.append(this.getClass().getName()).append('\n');
        for (String input : args) {
            sb.append("Input:\n");
            sb.append(input).append("\n");
            sb.append("tokenizeVerbatim:\n");
            sb.append(this.printTest(this.tokenizeVerbatim(input), input));
            sb.append("tokenize:\n");
            sb.append(this.printTest(this.tokenize(input, false, false, false, true), input));
            sb.append("tokenize (stemsAllowed):\n");
            sb.append(this.printTest(this.tokenize(input, true, false, false, true), input));
            sb.append("tokenize (stemsAllowed stopWordsAllowed):\n");
            sb.append(this.printTest(this.tokenize(input, true, true, false, true), input));
            sb.append("tokenize (stemsAllowed stopWordsAllowed filterDigits) (=tokenizeWords(MATCHING)):\n");
            sb.append(this.printTest(this.tokenize(input, true, true, true, true), input));
            sb.append("tokenize (stemsAllowed filterDigits) (=tokenizeWords(GLOSSARY)):\n");
            sb.append(this.printTest(this.tokenize(input, true, false, true, true), input));
            sb.append("tokenize (filterDigits) (=tokenizeWords(NONE)):\n");
            sb.append(this.printTest(this.tokenize(input, false, false, true, true), input));
            sb.append("----------------------------------\n");
        }
        return sb.toString();
    }

    protected String printTest(Token[] tokens, String input) {
        StringBuilder sb = new StringBuilder();
        Object[] strings = Token.getTextsFromString(tokens, input);
        sb.append(StringUtils.join(strings, ", ")).append('\n');
        sb.append("Is verbatim: ").append(StringUtils.join(strings, "").equals(input)).append('\n');
        return sb.toString();
    }

    static {
        for (Version v : Version.values()) {
            StringBuilder b = new StringBuilder();
            String vStr = v.toString();
            b.appendCodePoint(vStr.codePointAt(0));
            b.append(vStr.substring(vStr.offsetByCodePoints(0, 1)).toLowerCase().replace('_', ' '));
            int secondToLastOffset = b.offsetByCodePoints(b.length(), -1);
            if (Character.isDigit(b.codePointAt(secondToLastOffset))) {
                b.insert(secondToLastOffset, '.');
            }
            supportedBehaviors.put(v, b.toString());
        }
    }
}

