/*
 * Decompiled with CFR 0.152.
 */
package com.oracle.truffle.regex.tregex.parser.flavors;

import com.oracle.truffle.api.CompilerDirectives;
import com.oracle.truffle.regex.AbstractRegexObject;
import com.oracle.truffle.regex.RegexFlags;
import com.oracle.truffle.regex.RegexLanguage;
import com.oracle.truffle.regex.RegexSource;
import com.oracle.truffle.regex.RegexSyntaxException;
import com.oracle.truffle.regex.charset.CodePointSet;
import com.oracle.truffle.regex.charset.CodePointSetAccumulator;
import com.oracle.truffle.regex.charset.Constants;
import com.oracle.truffle.regex.errors.PyErrorMessages;
import com.oracle.truffle.regex.tregex.buffer.CompilationBuffer;
import com.oracle.truffle.regex.tregex.parser.RegexASTBuilder;
import com.oracle.truffle.regex.tregex.parser.RegexParser;
import com.oracle.truffle.regex.tregex.parser.Token;
import com.oracle.truffle.regex.tregex.parser.ast.Group;
import com.oracle.truffle.regex.tregex.parser.ast.LookBehindAssertion;
import com.oracle.truffle.regex.tregex.parser.ast.RegexAST;
import com.oracle.truffle.regex.tregex.parser.ast.RegexASTNode;
import com.oracle.truffle.regex.tregex.parser.ast.RegexASTRootNode;
import com.oracle.truffle.regex.tregex.parser.ast.RegexASTSubtreeRootNode;
import com.oracle.truffle.regex.tregex.parser.flavors.PythonFlags;
import com.oracle.truffle.regex.tregex.parser.flavors.PythonMethod;
import com.oracle.truffle.regex.tregex.parser.flavors.PythonREMode;
import com.oracle.truffle.regex.tregex.parser.flavors.PythonRegexLexer;
import java.util.ArrayList;
import java.util.EnumSet;

public final class PythonRegexParser
implements RegexParser {
    private static final EnumSet<Token.Kind> QUANTIFIER_PREV = EnumSet.of(Token.Kind.literalChar, Token.Kind.charClass, Token.Kind.charClassEnd, Token.Kind.groupEnd, Token.Kind.backReference);
    private final PythonREMode mode;
    private final PythonRegexLexer lexer;
    private final RegexASTBuilder astBuilder;
    private final CodePointSetAccumulator curCharClass = new CodePointSetAccumulator();

    public PythonRegexParser(RegexLanguage language, RegexSource source, CompilationBuffer compilationBuffer) throws RegexSyntaxException {
        this.mode = PythonREMode.fromEncoding(source.getEncoding());
        this.lexer = new PythonRegexLexer(source, this.mode, compilationBuffer);
        this.astBuilder = new RegexASTBuilder(language, source, PythonRegexParser.createECMAScriptFlags(source), false, compilationBuffer);
    }

    private static RegexFlags createECMAScriptFlags(RegexSource source) {
        boolean sticky = source.getOptions().getPythonMethod() == PythonMethod.match || source.getOptions().getPythonMethod() == PythonMethod.fullmatch;
        return RegexFlags.builder().dotAll(true).unicode(true).sticky(sticky).build();
    }

    private PythonFlags getLocalFlags() {
        return this.lexer.getLocalFlags();
    }

    @Override
    public PythonFlags getFlags() {
        return this.lexer.getGlobalFlags();
    }

    @Override
    public AbstractRegexObject getNamedCaptureGroups() {
        return AbstractRegexObject.createNamedCaptureGroupMapInt(this.lexer.getNamedCaptureGroups());
    }

    @Override
    @CompilerDirectives.TruffleBoundary
    public RegexAST parse() throws RegexSyntaxException {
        this.astBuilder.pushRootGroup(true);
        if (this.lexer.source.getOptions().getPythonMethod() == PythonMethod.fullmatch) {
            this.astBuilder.pushGroup();
        }
        ArrayList<Token.BackReference> conditionalBackReferences = new ArrayList<Token.BackReference>();
        Token token = null;
        while (this.lexer.hasNext()) {
            Token prev = token;
            Token.Kind prevKind = prev == null ? null : prev.kind;
            token = this.lexer.next();
            switch (token.kind) {
                case A: 
                case Z: {
                    this.astBuilder.addPositionAssertion(token);
                    break;
                }
                case caret: {
                    if (prevKind == Token.Kind.caret) break;
                    if (this.getLocalFlags().isMultiLine()) {
                        this.astBuilder.pushGroup();
                        this.astBuilder.addCaret();
                        this.astBuilder.nextSequence();
                        this.astBuilder.pushLookBehindAssertion(false);
                        this.astBuilder.addCharClass(CodePointSet.create(10));
                        this.astBuilder.popGroup();
                        this.astBuilder.popGroup();
                        break;
                    }
                    this.astBuilder.addPositionAssertion(token);
                    break;
                }
                case dollar: {
                    if (prevKind == Token.Kind.dollar) break;
                    this.astBuilder.pushGroup();
                    this.astBuilder.addDollar();
                    this.astBuilder.nextSequence();
                    this.astBuilder.pushLookAheadAssertion(false);
                    this.astBuilder.addCharClass(CodePointSet.create(10));
                    if (!this.getLocalFlags().isMultiLine()) {
                        this.astBuilder.addDollar();
                    }
                    this.astBuilder.popGroup();
                    this.astBuilder.popGroup();
                    break;
                }
                case wordBoundary: {
                    if (prevKind == Token.Kind.wordBoundary) break;
                    if (prevKind == Token.Kind.nonWordBoundary) {
                        this.astBuilder.replaceCurTermWithDeadNode();
                        break;
                    }
                    if (this.getLocalFlags().isUnicode(this.mode)) {
                        this.astBuilder.addWordBoundaryAssertion(this.lexer.getPredefinedCharClass('w'), this.lexer.getPredefinedCharClass('W'));
                        break;
                    }
                    if (this.getLocalFlags().isLocale()) {
                        this.astBuilder.addWordBoundaryAssertion(this.lexer.getLocaleData().getWordCharacters(), this.lexer.getLocaleData().getNonWordCharacters());
                        break;
                    }
                    this.astBuilder.addWordBoundaryAssertion(Constants.WORD_CHARS, Constants.NON_WORD_CHARS);
                    break;
                }
                case nonWordBoundary: {
                    if (prevKind == Token.Kind.nonWordBoundary) break;
                    if (prevKind == Token.Kind.wordBoundary) {
                        this.astBuilder.replaceCurTermWithDeadNode();
                        break;
                    }
                    if (this.getLocalFlags().isUnicode(this.mode)) {
                        this.astBuilder.addWordNonBoundaryAssertionPython(this.lexer.getPredefinedCharClass('w'), this.lexer.getPredefinedCharClass('W'));
                        break;
                    }
                    if (this.getLocalFlags().isLocale()) {
                        this.astBuilder.addWordNonBoundaryAssertionPython(this.lexer.getLocaleData().getWordCharacters(), this.lexer.getLocaleData().getNonWordCharacters());
                        break;
                    }
                    this.astBuilder.addWordNonBoundaryAssertionPython(Constants.WORD_CHARS, Constants.NON_WORD_CHARS);
                    break;
                }
                case backReference: {
                    Token.BackReference backRefToken = (Token.BackReference)token;
                    this.verifyGroupReference(backRefToken);
                    this.astBuilder.addBackReference(backRefToken, this.getLocalFlags().isIgnoreCase());
                    break;
                }
                case quantifier: {
                    if (prevKind == Token.Kind.quantifier) {
                        throw this.syntaxError("multiple repeat");
                    }
                    if (this.astBuilder.getCurTerm() == null || !QUANTIFIER_PREV.contains((Object)prevKind)) {
                        throw this.syntaxError("nothing to repeat");
                    }
                    this.astBuilder.addQuantifier((Token.Quantifier)token);
                    break;
                }
                case alternation: {
                    if (this.astBuilder.getCurGroup().isConditionalBackReferenceGroup() && this.astBuilder.getCurGroup().getAlternatives().size() == 2) {
                        throw this.syntaxError("conditional backref with more than two branches");
                    }
                    this.astBuilder.nextSequence();
                    break;
                }
                case captureGroupBegin: {
                    this.astBuilder.pushCaptureGroup(token);
                    break;
                }
                case nonCaptureGroupBegin: {
                    this.astBuilder.pushGroup(token);
                    break;
                }
                case atomicGroupBegin: {
                    this.astBuilder.pushAtomicGroup(token);
                    break;
                }
                case lookAheadAssertionBegin: {
                    this.astBuilder.pushLookAheadAssertion(token, ((Token.LookAheadAssertionBegin)token).isNegated());
                    break;
                }
                case lookBehindAssertionBegin: {
                    this.astBuilder.pushLookBehindAssertion(token, ((Token.LookBehindAssertionBegin)token).isNegated());
                    break;
                }
                case groupEnd: {
                    if (this.astBuilder.getCurGroup().getParent() instanceof RegexASTRootNode) {
                        throw this.syntaxError("unbalanced parenthesis");
                    }
                    if (this.astBuilder.getCurGroup().isLocalFlags()) {
                        this.lexer.popLocalFlags();
                    }
                    if (this.astBuilder.getCurGroup().isConditionalBackReferenceGroup() && this.astBuilder.getCurGroup().getAlternatives().size() == 1) {
                        this.astBuilder.nextSequence();
                    }
                    this.astBuilder.popGroup(token);
                    break;
                }
                case literalChar: {
                    this.literalChar(((Token.LiteralCharacter)token).getCodePoint());
                    break;
                }
                case charClass: {
                    this.astBuilder.addCharClass((Token.CharacterClass)token);
                    break;
                }
                case charClassBegin: {
                    this.curCharClass.clear();
                    break;
                }
                case charClassAtom: {
                    this.curCharClass.addSet(((Token.CharacterClassAtom)token).getContents());
                    break;
                }
                case charClassEnd: {
                    boolean wasSingleChar;
                    boolean bl = wasSingleChar = !this.lexer.isCurCharClassInverted() && this.curCharClass.matchesSingleChar();
                    if (this.lexer.featureEnabledIgnoreCase()) {
                        this.lexer.caseFoldUnfold(this.curCharClass);
                    }
                    CodePointSet cps = this.curCharClass.toCodePointSet();
                    this.astBuilder.addCharClass(this.lexer.isCurCharClassInverted() ? cps.createInverse(this.lexer.source.getEncoding()) : cps, wasSingleChar);
                    break;
                }
                case conditionalBackreference: {
                    Token.BackReference conditionalBackRefToken = (Token.BackReference)token;
                    this.verifyGroupReference(conditionalBackRefToken);
                    conditionalBackReferences.add(conditionalBackRefToken);
                    this.astBuilder.pushConditionalBackReferenceGroup(conditionalBackRefToken);
                    break;
                }
                case inlineFlags: {
                    Token.InlineFlags inlineFlags = (Token.InlineFlags)token;
                    if (inlineFlags.isGlobal()) {
                        boolean first;
                        boolean bl = first = prev == null || prevKind == Token.Kind.inlineFlags && ((Token.InlineFlags)prev).isGlobal();
                        if (!first) {
                            throw this.syntaxErrorAtAbs("global flags not at the start of the expression", inlineFlags.getPosition());
                        }
                        this.lexer.addGlobalFlags((PythonFlags)inlineFlags.getFlags());
                        break;
                    }
                    this.astBuilder.pushGroup(inlineFlags);
                    this.astBuilder.getCurGroup().setLocalFlags(true);
                    this.lexer.pushLocalFlags((PythonFlags)inlineFlags.getFlags());
                }
            }
        }
        if (this.lexer.source.getOptions().getPythonMethod() == PythonMethod.fullmatch) {
            this.astBuilder.popGroup();
            this.astBuilder.addDollar();
        }
        if (!this.astBuilder.curGroupIsRoot()) {
            throw this.syntaxErrorAtAbs("missing ), unterminated subpattern", this.astBuilder.getCurGroupStartPosition());
        }
        RegexAST ast = this.astBuilder.popRootGroup();
        for (Token.BackReference conditionalBackReference : conditionalBackReferences) {
            assert (conditionalBackReference.getGroupNumbers().length == 1);
            if (conditionalBackReference.getGroupNumbers()[0] < ast.getNumberOfCaptureGroups()) continue;
            throw this.syntaxErrorAtAbs(PyErrorMessages.invalidGroupReference(Integer.toString(conditionalBackReference.getGroupNumbers()[0])), conditionalBackReference.getPosition() + 3);
        }
        this.lexer.fixFlags();
        return ast;
    }

    private void literalChar(int codePoint) {
        if (this.lexer.featureEnabledIgnoreCase()) {
            this.curCharClass.clear();
            this.curCharClass.addCodePoint(codePoint);
            this.lexer.caseFoldUnfold(this.curCharClass);
            this.astBuilder.addCharClass(this.curCharClass.toCodePointSet(), true);
        } else {
            this.astBuilder.addCharClass(CodePointSet.create(codePoint));
        }
    }

    private void verifyGroupReference(Token.BackReference backRefToken) throws RegexSyntaxException {
        boolean conditional;
        boolean bl = conditional = backRefToken.kind == Token.Kind.conditionalBackreference;
        assert (backRefToken.getGroupNumbers().length == 1);
        int groupNumber = backRefToken.getGroupNumbers()[0];
        boolean insideLookBehind = this.insideLookBehind();
        if (conditional && insideLookBehind && groupNumber >= this.lexer.numberOfCaptureGroupsSoFar()) {
            throw this.syntaxErrorHere("cannot refer to an open group");
        }
        if (!conditional || insideLookBehind) {
            RegexASTNode parent;
            for (parent = this.astBuilder.getCurGroup(); parent != null; parent = parent.getParent()) {
                if (!(parent instanceof Group) || ((Group)parent).getGroupNumber() != groupNumber) continue;
                int errorPosition = backRefToken.isNamedReference() ? backRefToken.getPosition() + 4 : backRefToken.getPosition();
                throw this.syntaxErrorAtAbs("cannot refer to an open group", errorPosition);
            }
            if (this.astBuilder.getCurGroup() == null) {
                return;
            }
            for (parent = this.astBuilder.getCurGroup().getSubTreeParent(); parent != null; parent = parent.getSubTreeParent()) {
                if (!(parent instanceof LookBehindAssertion) || ((LookBehindAssertion)parent).getGroup().getEnclosedCaptureGroupsLow() > groupNumber) continue;
                throw this.syntaxErrorHere("cannot refer to group defined in the same lookbehind subpattern");
            }
        }
    }

    private boolean insideLookBehind() {
        boolean insideLookBehind = false;
        for (RegexASTSubtreeRootNode subTreeParent = this.astBuilder.getCurGroup().getSubTreeParent(); subTreeParent != null; subTreeParent = ((RegexASTNode)subTreeParent).getSubTreeParent()) {
            if (!subTreeParent.isLookBehindAssertion()) continue;
            insideLookBehind = true;
        }
        return insideLookBehind;
    }

    private RegexSyntaxException syntaxError(String msg) {
        return this.lexer.syntaxError(msg);
    }

    private RegexSyntaxException syntaxErrorHere(String msg) {
        return this.lexer.syntaxErrorHere(msg);
    }

    private RegexSyntaxException syntaxErrorAtAbs(String msg, int i) {
        return this.lexer.syntaxErrorAtAbs(msg, i);
    }
}

