Parser.php 12.3 KB
<?php

namespace NF_FU_VENDOR\JmesPath;

use NF_FU_VENDOR\JmesPath\Lexer as T;
/**
 * JMESPath Pratt parser
 * @link http://hall.org.ua/halls/wizzard/pdf/Vaughan.Pratt.TDOP.pdf
 */
class Parser
{
    /** @var Lexer */
    private $lexer;
    private $tokens;
    private $token;
    private $tpos;
    private $expression;
    private static $nullToken = ['type' => T::T_EOF];
    private static $currentNode = ['type' => T::T_CURRENT];
    private static $bp = [T::T_EOF => 0, T::T_QUOTED_IDENTIFIER => 0, T::T_IDENTIFIER => 0, T::T_RBRACKET => 0, T::T_RPAREN => 0, T::T_COMMA => 0, T::T_RBRACE => 0, T::T_NUMBER => 0, T::T_CURRENT => 0, T::T_EXPREF => 0, T::T_COLON => 0, T::T_PIPE => 1, T::T_OR => 2, T::T_AND => 3, T::T_COMPARATOR => 5, T::T_FLATTEN => 9, T::T_STAR => 20, T::T_FILTER => 21, T::T_DOT => 40, T::T_NOT => 45, T::T_LBRACE => 50, T::T_LBRACKET => 55, T::T_LPAREN => 60];
    /** @var array Acceptable tokens after a dot token */
    private static $afterDot = [
        T::T_IDENTIFIER => \true,
        // foo.bar
        T::T_QUOTED_IDENTIFIER => \true,
        // foo."bar"
        T::T_STAR => \true,
        // foo.*
        T::T_LBRACE => \true,
        // foo[1]
        T::T_LBRACKET => \true,
        // foo{a: 0}
        T::T_FILTER => \true,
    ];
    /**
     * @param Lexer $lexer Lexer used to tokenize expressions
     */
    public function __construct(Lexer $lexer = null)
    {
        $this->lexer = $lexer ?: new Lexer();
    }
    /**
     * Parses a JMESPath expression into an AST
     *
     * @param string $expression JMESPath expression to compile
     *
     * @return array Returns an array based AST
     * @throws SyntaxErrorException
     */
    public function parse($expression)
    {
        $this->expression = $expression;
        $this->tokens = $this->lexer->tokenize($expression);
        $this->tpos = -1;
        $this->next();
        $result = $this->expr();
        if ($this->token['type'] === T::T_EOF) {
            return $result;
        }
        throw $this->syntax('Did not reach the end of the token stream');
    }
    /**
     * Parses an expression while rbp < lbp.
     *
     * @param int   $rbp  Right bound precedence
     *
     * @return array
     */
    private function expr($rbp = 0)
    {
        $left = $this->{"nud_{$this->token['type']}"}();
        while ($rbp < self::$bp[$this->token['type']]) {
            $left = $this->{"led_{$this->token['type']}"}($left);
        }
        return $left;
    }
    private function nud_identifier()
    {
        $token = $this->token;
        $this->next();
        return ['type' => 'field', 'value' => $token['value']];
    }
    private function nud_quoted_identifier()
    {
        $token = $this->token;
        $this->next();
        $this->assertNotToken(T::T_LPAREN);
        return ['type' => 'field', 'value' => $token['value']];
    }
    private function nud_current()
    {
        $this->next();
        return self::$currentNode;
    }
    private function nud_literal()
    {
        $token = $this->token;
        $this->next();
        return ['type' => 'literal', 'value' => $token['value']];
    }
    private function nud_expref()
    {
        $this->next();
        return ['type' => T::T_EXPREF, 'children' => [$this->expr(self::$bp[T::T_EXPREF])]];
    }
    private function nud_not()
    {
        $this->next();
        return ['type' => T::T_NOT, 'children' => [$this->expr(self::$bp[T::T_NOT])]];
    }
    private function nud_lparen()
    {
        $this->next();
        $result = $this->expr(0);
        if ($this->token['type'] !== T::T_RPAREN) {
            throw $this->syntax('Unclosed `(`');
        }
        $this->next();
        return $result;
    }
    private function nud_lbrace()
    {
        static $validKeys = [T::T_QUOTED_IDENTIFIER => \true, T::T_IDENTIFIER => \true];
        $this->next($validKeys);
        $pairs = [];
        do {
            $pairs[] = $this->parseKeyValuePair();
            if ($this->token['type'] == T::T_COMMA) {
                $this->next($validKeys);
            }
        } while ($this->token['type'] !== T::T_RBRACE);
        $this->next();
        return ['type' => 'multi_select_hash', 'children' => $pairs];
    }
    private function nud_flatten()
    {
        return $this->led_flatten(self::$currentNode);
    }
    private function nud_filter()
    {
        return $this->led_filter(self::$currentNode);
    }
    private function nud_star()
    {
        return $this->parseWildcardObject(self::$currentNode);
    }
    private function nud_lbracket()
    {
        $this->next();
        $type = $this->token['type'];
        if ($type == T::T_NUMBER || $type == T::T_COLON) {
            return $this->parseArrayIndexExpression();
        } elseif ($type == T::T_STAR && $this->lookahead() == T::T_RBRACKET) {
            return $this->parseWildcardArray();
        } else {
            return $this->parseMultiSelectList();
        }
    }
    private function led_lbracket(array $left)
    {
        static $nextTypes = [T::T_NUMBER => \true, T::T_COLON => \true, T::T_STAR => \true];
        $this->next($nextTypes);
        switch ($this->token['type']) {
            case T::T_NUMBER:
            case T::T_COLON:
                return ['type' => 'subexpression', 'children' => [$left, $this->parseArrayIndexExpression()]];
            default:
                return $this->parseWildcardArray($left);
        }
    }
    private function led_flatten(array $left)
    {
        $this->next();
        return ['type' => 'projection', 'from' => 'array', 'children' => [['type' => T::T_FLATTEN, 'children' => [$left]], $this->parseProjection(self::$bp[T::T_FLATTEN])]];
    }
    private function led_dot(array $left)
    {
        $this->next(self::$afterDot);
        if ($this->token['type'] == T::T_STAR) {
            return $this->parseWildcardObject($left);
        }
        return ['type' => 'subexpression', 'children' => [$left, $this->parseDot(self::$bp[T::T_DOT])]];
    }
    private function led_or(array $left)
    {
        $this->next();
        return ['type' => T::T_OR, 'children' => [$left, $this->expr(self::$bp[T::T_OR])]];
    }
    private function led_and(array $left)
    {
        $this->next();
        return ['type' => T::T_AND, 'children' => [$left, $this->expr(self::$bp[T::T_AND])]];
    }
    private function led_pipe(array $left)
    {
        $this->next();
        return ['type' => T::T_PIPE, 'children' => [$left, $this->expr(self::$bp[T::T_PIPE])]];
    }
    private function led_lparen(array $left)
    {
        $args = [];
        $this->next();
        while ($this->token['type'] != T::T_RPAREN) {
            $args[] = $this->expr(0);
            if ($this->token['type'] == T::T_COMMA) {
                $this->next();
            }
        }
        $this->next();
        return ['type' => 'function', 'value' => $left['value'], 'children' => $args];
    }
    private function led_filter(array $left)
    {
        $this->next();
        $expression = $this->expr();
        if ($this->token['type'] != T::T_RBRACKET) {
            throw $this->syntax('Expected a closing rbracket for the filter');
        }
        $this->next();
        $rhs = $this->parseProjection(self::$bp[T::T_FILTER]);
        return ['type' => 'projection', 'from' => 'array', 'children' => [$left ?: self::$currentNode, ['type' => 'condition', 'children' => [$expression, $rhs]]]];
    }
    private function led_comparator(array $left)
    {
        $token = $this->token;
        $this->next();
        return ['type' => T::T_COMPARATOR, 'value' => $token['value'], 'children' => [$left, $this->expr(self::$bp[T::T_COMPARATOR])]];
    }
    private function parseProjection($bp)
    {
        $type = $this->token['type'];
        if (self::$bp[$type] < 10) {
            return self::$currentNode;
        } elseif ($type == T::T_DOT) {
            $this->next(self::$afterDot);
            return $this->parseDot($bp);
        } elseif ($type == T::T_LBRACKET || $type == T::T_FILTER) {
            return $this->expr($bp);
        }
        throw $this->syntax('Syntax error after projection');
    }
    private function parseDot($bp)
    {
        if ($this->token['type'] == T::T_LBRACKET) {
            $this->next();
            return $this->parseMultiSelectList();
        }
        return $this->expr($bp);
    }
    private function parseKeyValuePair()
    {
        static $validColon = [T::T_COLON => \true];
        $key = $this->token['value'];
        $this->next($validColon);
        $this->next();
        return ['type' => 'key_val_pair', 'value' => $key, 'children' => [$this->expr()]];
    }
    private function parseWildcardObject(array $left = null)
    {
        $this->next();
        return ['type' => 'projection', 'from' => 'object', 'children' => [$left ?: self::$currentNode, $this->parseProjection(self::$bp[T::T_STAR])]];
    }
    private function parseWildcardArray(array $left = null)
    {
        static $getRbracket = [T::T_RBRACKET => \true];
        $this->next($getRbracket);
        $this->next();
        return ['type' => 'projection', 'from' => 'array', 'children' => [$left ?: self::$currentNode, $this->parseProjection(self::$bp[T::T_STAR])]];
    }
    /**
     * Parses an array index expression (e.g., [0], [1:2:3]
     */
    private function parseArrayIndexExpression()
    {
        static $matchNext = [T::T_NUMBER => \true, T::T_COLON => \true, T::T_RBRACKET => \true];
        $pos = 0;
        $parts = [null, null, null];
        $expected = $matchNext;
        do {
            if ($this->token['type'] == T::T_COLON) {
                $pos++;
                $expected = $matchNext;
            } elseif ($this->token['type'] == T::T_NUMBER) {
                $parts[$pos] = $this->token['value'];
                $expected = [T::T_COLON => \true, T::T_RBRACKET => \true];
            }
            $this->next($expected);
        } while ($this->token['type'] != T::T_RBRACKET);
        // Consume the closing bracket
        $this->next();
        if ($pos === 0) {
            // No colons were found so this is a simple index extraction
            return ['type' => 'index', 'value' => $parts[0]];
        }
        if ($pos > 2) {
            throw $this->syntax('Invalid array slice syntax: too many colons');
        }
        // Sliced array from start (e.g., [2:])
        return ['type' => 'projection', 'from' => 'array', 'children' => [['type' => 'slice', 'value' => $parts], $this->parseProjection(self::$bp[T::T_STAR])]];
    }
    private function parseMultiSelectList()
    {
        $nodes = [];
        do {
            $nodes[] = $this->expr();
            if ($this->token['type'] == T::T_COMMA) {
                $this->next();
                $this->assertNotToken(T::T_RBRACKET);
            }
        } while ($this->token['type'] !== T::T_RBRACKET);
        $this->next();
        return ['type' => 'multi_select_list', 'children' => $nodes];
    }
    private function syntax($msg)
    {
        return new SyntaxErrorException($msg, $this->token, $this->expression);
    }
    private function lookahead()
    {
        return !isset($this->tokens[$this->tpos + 1]) ? T::T_EOF : $this->tokens[$this->tpos + 1]['type'];
    }
    private function next(array $match = null)
    {
        if (!isset($this->tokens[$this->tpos + 1])) {
            $this->token = self::$nullToken;
        } else {
            $this->token = $this->tokens[++$this->tpos];
        }
        if ($match && !isset($match[$this->token['type']])) {
            throw $this->syntax($match);
        }
    }
    private function assertNotToken($type)
    {
        if ($this->token['type'] == $type) {
            throw $this->syntax("Token {$this->tpos} not allowed to be {$type}");
        }
    }
    /**
     * @internal Handles undefined tokens without paying the cost of validation
     */
    public function __call($method, $args)
    {
        $prefix = \substr($method, 0, 4);
        if ($prefix == 'nud_' || $prefix == 'led_') {
            $token = \substr($method, 4);
            $message = "Unexpected \"{$token}\" token ({$method}). Expected one of" . " the following tokens: " . \implode(', ', \array_map(function ($i) {
                return '"' . \substr($i, 4) . '"';
            }, \array_filter(\get_class_methods($this), function ($i) use($prefix) {
                return \strpos($i, $prefix) === 0;
            })));
            throw $this->syntax($message);
        }
        throw new \BadMethodCallException("Call to undefined method {$method}");
    }
}