package org.basex.util.ft;

import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import org.basex.util.Token;
import org.basex.util.TokenBuilder;
import org.basex.util.Util;
import org.basex.util.list.IntList;

/* loaded from: input_file:org/basex/util/ft/WesternTokenizer.class */
public final class WesternTokenizer extends Tokenizer {
    private static final HashSet<Language> SUPPORTED = new HashSet<>();
    private final boolean dc;
    private final boolean cs;
    private final boolean uc;
    private final boolean lc;
    private final boolean wc;
    private boolean pa;
    private int sent;
    private int para;
    private int pm;
    private int lp;
    private int spos;
    private int cpos;
    private boolean sc;
    private int next;
    private final IntList sen = new IntList();
    private final IntList par = new IntList();
    private byte[] text = Token.EMPTY;
    private int pos = -1;

    static {
        String[] strArr = {"ar", "ja", "ko", "th", "zh"};
        for (Language language : Language.ALL.values()) {
            if (!Token.eq(language.code(), strArr)) {
                SUPPORTED.add(language);
            }
        }
    }

    public WesternTokenizer(FTOpt fTOpt) {
        this.lc = fTOpt != null && fTOpt.is(FTFlag.LC);
        this.uc = fTOpt != null && fTOpt.is(FTFlag.UC);
        this.cs = fTOpt != null && fTOpt.is(FTFlag.CS);
        this.wc = fTOpt != null && fTOpt.is(FTFlag.WC);
        this.dc = fTOpt != null && fTOpt.is(FTFlag.DC);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    @Override // org.basex.util.ft.LanguageImpl
    public Collection<Language> languages() {
        return SUPPORTED;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    @Override // org.basex.util.ft.Tokenizer
    public Tokenizer get(FTOpt fTOpt) {
        return new WesternTokenizer(fTOpt);
    }

    @Override // org.basex.util.ft.FTIterator
    public WesternTokenizer init(byte[] bArr) {
        if (this.text != bArr) {
            this.text = bArr;
            this.sen.reset();
            this.par.reset();
        }
        init();
        return this;
    }

    private void init() {
        this.sent = 0;
        this.para = 0;
        this.pos = -1;
        this.cpos = 0;
        this.next = 0;
    }

    @Override // java.util.Iterator
    public boolean hasNext() {
        if (this.next <= 0 && (!this.special ? more() : moreSC())) {
            this.next++;
        }
        return this.next > 0;
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // java.util.Iterator
    public FTSpan next() {
        return new FTSpan(nextToken(), this.pos, this.sc);
    }

    @Override // org.basex.util.ft.FTIterator
    public byte[] nextToken() {
        int i = this.next - 1;
        this.next = i;
        if (i < 0) {
            hasNext();
        }
        return this.special ? getSC() : get();
    }

    private boolean more() {
        int length = this.text.length;
        this.pos++;
        this.lp = this.cpos;
        boolean z = false;
        this.pa = false;
        boolean z2 = false;
        while (true) {
            if (this.cpos >= length) {
                break;
            }
            int cp = Token.cp(this.text, this.cpos);
            if (this.wc && !z2) {
                z2 = cp == 92;
                if (!z2) {
                    if (cp == 46) {
                        break;
                    }
                } else {
                    continue;
                    this.cpos += Token.cl(this.text, this.cpos);
                }
            }
            if (!z && (cp == 46 || cp == 33 || cp == 63)) {
                z = true;
                this.sent++;
                this.pm = cp;
            } else if (!this.pa && cp == 10) {
                this.pa = true;
                this.para++;
            } else if (Token.ftChar(cp)) {
                if (z2) {
                    this.cpos--;
                    z2 = false;
                }
            }
            z2 = false;
            this.cpos += Token.cl(this.text, this.cpos);
        }
        this.spos = this.cpos;
        if (this.cpos == length) {
            return false;
        }
        while (this.cpos < length) {
            int cp2 = Token.cp(this.text, this.cpos);
            if (this.wc && !z2) {
                z2 = cp2 == 92;
                if (z2) {
                    continue;
                } else if (cp2 == 46) {
                    byte b = this.cpos + 1 < length ? this.text[this.cpos + 1] : (byte) 0;
                    if (b == 63 || b == 42 || b == 43) {
                        this.cpos++;
                    } else {
                        if (b != 123) {
                            continue;
                        }
                        do {
                            int i = this.cpos + 1;
                            this.cpos = i;
                            if (i >= length) {
                                break;
                            }
                        } while (this.text[this.cpos] != 125);
                        if (this.cpos == length) {
                            return true;
                        }
                    }
                }
                this.cpos += Token.cl(this.text, this.cpos);
            }
            if (!Token.ftChar(cp2)) {
                if (!z2) {
                    return true;
                }
                this.cpos--;
                return true;
            }
            z2 = false;
            this.cpos += Token.cl(this.text, this.cpos);
        }
        return true;
    }

    private byte[] get() {
        byte[] orig = orig();
        boolean ascii = Token.ascii(orig);
        if (!ascii && !this.dc) {
            orig = dia(orig);
        }
        if (this.uc) {
            orig = upper(orig, ascii);
        }
        if (this.lc || !this.cs) {
            orig = lower(orig, ascii);
        }
        return orig;
    }

    private byte[] orig() {
        int i = this.cpos - this.spos;
        byte[] bArr = new byte[i];
        System.arraycopy(this.text, this.spos, bArr, 0, i);
        return bArr;
    }

    private boolean moreSC() {
        int length = this.text.length;
        this.pa = false;
        this.sc = false;
        this.lp = this.cpos;
        while (true) {
            if (this.cpos >= length) {
                break;
            }
            int cp = Token.cp(this.text, this.cpos);
            if (cp == 10) {
                this.pa = true;
                this.cpos++;
                this.sc = true;
                break;
            }
            if (Token.ftChar(cp)) {
                break;
            }
            this.sc = true;
            this.cpos += Token.cl(this.text, this.cpos);
        }
        if (this.lp < this.cpos) {
            return true;
        }
        this.pos++;
        this.spos = this.cpos;
        if (this.cpos == length) {
            return false;
        }
        while (this.cpos < length) {
            if (!Token.ftChar(Token.cp(this.text, this.cpos))) {
                this.spos = this.cpos - Token.cl(this.text, this.cpos);
                return true;
            }
            this.cpos += Token.cl(this.text, this.cpos);
        }
        return true;
    }

    private byte[] getSC() {
        return this.lp < this.cpos ? Arrays.copyOfRange(this.text, this.lp, this.cpos) : Arrays.copyOfRange(this.text, this.cpos, this.spos);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    @Override // org.basex.util.ft.Tokenizer
    public int pos(int i, FTUnit fTUnit) {
        if (fTUnit == FTUnit.WORD) {
            return i;
        }
        IntList intList = fTUnit == FTUnit.SENTENCE ? this.sen : this.par;
        if (this.sen.size() == 0) {
            init();
            while (more()) {
                this.sen.add(this.sent);
                this.par.add(this.para);
            }
        }
        return intList.get(i);
    }

    private static byte[] dia(byte[] bArr) {
        int length = bArr.length;
        int i = 0;
        while (true) {
            int i2 = i;
            if (i2 >= length) {
                return bArr;
            }
            int cp = Token.cp(bArr, i2);
            if (cp != Token.norm(cp)) {
                TokenBuilder tokenBuilder = new TokenBuilder();
                tokenBuilder.add(bArr, 0, i2);
                int i3 = i2;
                while (true) {
                    int i4 = i3;
                    if (i4 >= length) {
                        return tokenBuilder.finish();
                    }
                    tokenBuilder.add(Token.norm(Token.cp(bArr, i4)));
                    i3 = i4 + Token.cl(bArr, i4);
                }
            } else {
                i = i2 + Token.cl(bArr, i2);
            }
        }
    }

    private static byte[] upper(byte[] bArr, boolean z) {
        int length = bArr.length;
        if (z) {
            for (int i = 0; i < length; i++) {
                bArr[i] = (byte) Token.uc(bArr[i]);
            }
            return bArr;
        }
        TokenBuilder tokenBuilder = new TokenBuilder();
        int i2 = 0;
        while (true) {
            int i3 = i2;
            if (i3 >= length) {
                return tokenBuilder.finish();
            }
            tokenBuilder.add(Token.uc(Token.cp(bArr, i3)));
            i2 = i3 + Token.cl(bArr, i3);
        }
    }

    private static byte[] lower(byte[] bArr, boolean z) {
        int length = bArr.length;
        if (z) {
            for (int i = 0; i < length; i++) {
                bArr[i] = (byte) Token.lc(bArr[i]);
            }
            return bArr;
        }
        TokenBuilder tokenBuilder = new TokenBuilder();
        int i2 = 0;
        while (true) {
            int i3 = i2;
            if (i3 >= length) {
                return tokenBuilder.finish();
            }
            tokenBuilder.add(Token.lc(Token.cp(bArr, i3)));
            i2 = i3 + Token.cl(bArr, i3);
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* JADX WARN: Type inference failed for: r0v16, types: [int[], int[][]] */
    @Override // org.basex.util.ft.Tokenizer
    public int[][] info() {
        int i;
        init();
        IntList[] intListArr = {new IntList(), new IntList(), new IntList(), new IntList(), new IntList()};
        int i2 = 0;
        int i3 = 0;
        int i4 = 0;
        int i5 = 0;
        while (true) {
            i = i5;
            if (!more()) {
                break;
            }
            byte[] orig = orig();
            int length = orig.length;
            intListArr[0].add(length);
            for (byte b : orig) {
                intListArr[3].add(b);
            }
            if (this.sent != i2) {
                if (i4 > 0) {
                    intListArr[1].add(i4);
                    intListArr[4].add(this.pm);
                }
                i2 = this.sent;
                i4 = 0;
            }
            if (this.para != i3) {
                if (i > 0) {
                    intListArr[2].add(i);
                }
                i3 = this.para;
                i = 0;
            }
            i4 += length;
            i5 = i + length;
        }
        if (this.sent != i2 && i4 > 0) {
            intListArr[1].add(i4);
            intListArr[4].add(this.pm);
        }
        if (i > 0) {
            intListArr[2].add(i);
        }
        intListArr[1].add(i4 + 1);
        return new int[]{intListArr[0].toArray(), intListArr[1].toArray(), intListArr[2].toArray(), intListArr[3].toArray(), intListArr[4].toArray()};
    }

    @Override // org.basex.util.ft.LanguageImpl
    protected byte prec() {
        return (byte) 10;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    @Override // org.basex.util.ft.Tokenizer
    public boolean paragraph() {
        return this.pa;
    }

    public String toString() {
        return String.valueOf(Util.name(this)) + '[' + Token.string(this.text) + ']';
    }

    @Override // org.basex.util.ft.Tokenizer, org.basex.util.ft.LanguageImpl
    public /* bridge */ /* synthetic */ boolean supports(Language language) {
        return super.supports(language);
    }

    @Override // org.basex.util.ft.Tokenizer, org.basex.util.ft.LanguageImpl
    public /* bridge */ /* synthetic */ int hashCode() {
        return super.hashCode();
    }
}
