diff options
author | Dan Ungureanu <udan1107@gmail.com> | 2015-08-02 22:04:05 +0300 |
---|---|---|
committer | Dan Ungureanu <udan1107@gmail.com> | 2015-08-02 22:04:05 +0300 |
commit | 9130cca0d17169b4647d39b644cc6483b600623c (patch) | |
tree | aab84450559ba2651f31d92648ff9549f25eed5e /src | |
parent | e6a562e3e5bfbbbc03d469e50d65d063cac9ebd4 (diff) | |
download | sql-parser-9130cca0d17169b4647d39b644cc6483b600623c.zip sql-parser-9130cca0d17169b4647d39b644cc6483b600623c.tar.gz sql-parser-9130cca0d17169b4647d39b644cc6483b600623c.tar.bz2 |
Added utility to get first full statement from a buffer.
Misc coding style fixes.
Diffstat (limited to 'src')
-rw-r--r-- | src/Lexer.php | 92 | ||||
-rw-r--r-- | src/Utils/Query.php | 63 |
2 files changed, 117 insertions, 38 deletions
diff --git a/src/Lexer.php b/src/Lexer.php index 697218c..ba4f6ef 100644 --- a/src/Lexer.php +++ b/src/Lexer.php @@ -63,37 +63,38 @@ namespace SqlParser { { /** - * A list of methods that are used in lexing the SQL query. - * - * @var array - */ + * A list of methods that are used in lexing the SQL query. + * + * @var array + */ public static $PARSER_METHODS = array( - // It is best to put the parsers in order of their complexity - // (ascending) and their occurrence rate (descending). - // - // Conflicts: - // - // 1. `parseDelimiter` and `parseUnknown`, `parseKeyword`, `parseNumber` - // They fight over delimiter. The delimiter may be a keyword, a number - // or almost any character which makes the delimiter one of the first - // tokens that must be parsed. - // - // 1. `parseNumber` and `parseOperator` - // They fight over `+` and `-`. - // - // 2. `parseComment` and `parseOperator` - // They fight over `/` (as in ```/*comment*/``` or ```a / b```) - // - // 3. `parseBool` and `parseKeyword` - // They fight over `TRUE` and `FALSE`. - // - // 4. `parseKeyword` and `parseUnknown` - // They fight over words. `parseUnknown` does not know about keywords. - - 'parseDelimiter', 'parseWhitespace', 'parseNumber', 'parseComment', - 'parseOperator', 'parseBool', 'parseString', 'parseSymbol', - 'parseKeyword', 'parseUnknown' + // It is best to put the parsers in order of their complexity + // (ascending) and their occurrence rate (descending). + // + // Conflicts: + // + // 1. `parseDelimiter`, `parseUnknown`, `parseKeyword`, `parseNumber` + // They fight over delimiter. The delimiter may be a keyword, a + // number or almost any character which makes the delimiter one of + // the first tokens that must be parsed. + // + // 1. `parseNumber` and `parseOperator` + // They fight over `+` and `-`. + // + // 2. `parseComment` and `parseOperator` + // They fight over `/` (as in ```/*comment*/``` or ```a / b```) + // + // 3. `parseBool` and `parseKeyword` + // They fight over `TRUE` and `FALSE`. + // + // 4. `parseKeyword` and `parseUnknown` + // They fight over words. `parseUnknown` does not know about + // keywords. + + 'parseDelimiter', 'parseWhitespace', 'parseNumber', + 'parseComment', 'parseOperator', 'parseBool', 'parseString', + 'parseSymbol', 'parseKeyword', 'parseUnknown' ); /** @@ -149,7 +150,7 @@ namespace SqlParser { * * @var string */ - public $delimiter = ';'; + public $delimiter; /** * The length of the delimiter. @@ -160,7 +161,7 @@ namespace SqlParser { * * @var int */ - public $delimiterLen = 1; + public $delimiterLen; /** * List of errors that occurred during lexing. @@ -178,10 +179,12 @@ namespace SqlParser { /** * Constructor. * - * @param string|UtfString $str The query to be lexed. - * @param bool $strict Whether strict mode should be enabled or not. + * @param string|UtfString $str The query to be lexed. + * @param bool $strict Whether strict mode should be + * enabled or not. + * @param string $delimiter The delimiter to be used. */ - public function __construct($str, $strict = false) + public function __construct($str, $strict = false, $delimiter = null) { // `strlen` is used instead of `mb_strlen` because the lexer needs to // parse each byte of the input. @@ -201,12 +204,25 @@ namespace SqlParser { $this->strict = $strict; // Setting the delimiter. - $this->delimiter = static::$DEFAULT_DELIMITER; + $this->setDelimiter( + !empty($delimiter) ? $delimiter : static::$DEFAULT_DELIMITER + ); $this->lex(); } /** + * Sets the delimiter. + * + * @param string $delimiter The new delimiter. + */ + public function setDelimiter($delimiter) + { + $this->delimiter = $delimiter; + $this->delimiterLen = strlen($delimiter); + } + + /** * Parses the string and extracts lexemes. * * @return void @@ -521,9 +537,9 @@ namespace SqlParser { while ((++$this->last < $this->len) && ($this->str[$this->last] !== "\n")) { $token .= $this->str[$this->last]; } - if ($this->last < $this->len) { - $token .= $this->str[$this->last]; - } + + // Adding the line ending. + $token .= "\n"; } return new Token($token, Token::TYPE_COMMENT, Token::FLAG_COMMENT_SQL); } diff --git a/src/Utils/Query.php b/src/Utils/Query.php index c11e858..d8e9500 100644 --- a/src/Utils/Query.php +++ b/src/Utils/Query.php @@ -587,6 +587,10 @@ class Query for ($i = $statement->first; $i <= $statement->last; ++$i) { $token = $list->tokens[$i]; + if ($token->type === Token::TYPE_COMMENT) { + continue; + } + if ($token->type === Token::TYPE_OPERATOR) { if ($token->value === '(') { ++$brackets; @@ -708,4 +712,63 @@ class Query return $ret; } + + /** + * Gets the first full statement in the query. + * + * @param string $query The query to be analyzed. + * @param string $delimiter The delimiter to be used. + * + * @return array Array containing the first full query, the + * remaining part of the query and the last + * delimiter. + */ + public static function getFirstStatement($query, $delimiter = null) + { + $lexer = new Lexer($query, false, $delimiter); + $list = $lexer->list; + + /** + * Whether a full statement was found. + * @var bool + */ + $fullStatement = false; + + /** + * The first full statement. + * @var string + */ + $statement = ''; + + for ($list->idx = 0; $list->idx < $list->count; ++$list->idx) { + $token = $list->tokens[$list->idx]; + + if ($token->type === Token::TYPE_COMMENT) { + continue; + } + + $statement .= $token->token; + + if (($token->type === Token::TYPE_DELIMITER) && (!empty($token->value))) { + $delimiter = $token->value; + $fullStatement = true; + break; + } + } + + // No statement was found so we return the entire query as being the + // remaining part. + if (!$fullStatement) { + return array(null, $query, $delimiter); + } + + // At least one query was found so we have to build the rest of the + // remaining query. + $query = ''; + for (++$list->idx; $list->idx < $list->count; ++$list->idx) { + $query .= $list->tokens[$list->idx]->value; + } + + return array(trim($statement), $query, $delimiter); + } } |