diff options
author | Dan Ungureanu <udan1107@gmail.com> | 2015-08-09 22:14:00 +0300 |
---|---|---|
committer | Dan Ungureanu <udan1107@gmail.com> | 2015-08-09 22:14:00 +0300 |
commit | 0ccc3a271390b8491765960cc92575d7b96e05f1 (patch) | |
tree | 8bb7b4ee31187bfad3fd2cfecdc87c29f3db3fe8 | |
parent | 77d5409b57f0ac82cf2ae71f339912c7f175c5cc (diff) | |
download | sql-parser-0ccc3a271390b8491765960cc92575d7b96e05f1.zip sql-parser-0ccc3a271390b8491765960cc92575d7b96e05f1.tar.gz sql-parser-0ccc3a271390b8491765960cc92575d7b96e05f1.tar.bz2 |
Introduced the buffered query parser. This is used to extract statements from a
buffered string that may be parsed using the parser.
DELIMITER is no longer reported as unexpected beginning of statement.
Fixed typos.
-rw-r--r-- | src/Components/ExpressionArray.php | 4 | ||||
-rw-r--r-- | src/Parser.php | 10 | ||||
-rw-r--r-- | src/Utils/BufferedQuery.php | 390 | ||||
-rw-r--r-- | src/Utils/Query.php | 2 | ||||
-rw-r--r-- | src/Utils/Table.php | 2 |
5 files changed, 404 insertions, 4 deletions
diff --git a/src/Components/ExpressionArray.php b/src/Components/ExpressionArray.php index f453154..309972a 100644 --- a/src/Components/ExpressionArray.php +++ b/src/Components/ExpressionArray.php @@ -1,7 +1,7 @@ <?php /** - * Parses a a list of expression delimited by a comma. + * Parses a a list of expressions delimited by a comma. * * @package SqlParser * @subpackage Components @@ -14,7 +14,7 @@ use SqlParser\Token; use SqlParser\TokensList; /** - * Parses a a list of expression delimited by a comma. + * Parses a a list of expressions delimited by a comma. * * @category Keywords * @package SqlParser diff --git a/src/Parser.php b/src/Parser.php index b44ecc6..ac92aaf 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -368,6 +368,16 @@ namespace SqlParser { */ $token = $list->tokens[$list->idx]; + // `DELIMITER` is not an actual statement and it requires + // special handling. + if (($token->type === Token::TYPE_NONE) + && (strtoupper($token->token) === 'DELIMITER') + ) { + // Skipping to the end of this statement. + $list->getNextOfType(Token::TYPE_DELIMITER); + continue; + } + // Statements can start with keywords only. // Comments, whitespaces, etc. are ignored. if ($token->type !== Token::TYPE_KEYWORD) { diff --git a/src/Utils/BufferedQuery.php b/src/Utils/BufferedQuery.php new file mode 100644 index 0000000..dc2bdcb --- /dev/null +++ b/src/Utils/BufferedQuery.php @@ -0,0 +1,390 @@ +<?php + +/** + * Buffered query utilities. + * + * @package SqlParser + * @subpackage Utils + */ +namespace SqlParser\Utils; + +use SqlParser\Context; + +/** + * Buffer query utilities. + * + * Implements a specialized lexer used to extract statements from large inputs + * that are being buffered. After each statement has been extracted, a lexer or + * a parser may be used. + * + * All comments are skipped, with one exception: MySQL commands inside `/*!`. + * + * @category Lexer + * @package SqlParser + * @subpackage Utils + * @author Dan Ungureanu <udan1107@gmail.com> + * @license http://opensource.org/licenses/GPL-2.0 GNU Public License + */ +class BufferedQuery +{ + + // Constants that describe the current status of the parser. + const STATUS_STRING_SINGLE_QUOTES = 1; + const STATUS_STRING_DOUBLE_QUOTES = 2; + const STATUS_STRING_BACKTICK = 3; + const STATUS_COMMENT_BASH = 4; + const STATUS_COMMENT_C = 5; + const STATUS_COMMENT_SQL = 6; + + /** + * The query that is being processed. + * + * This field can be modified just by appending to it! + * + * @var string + */ + public $query = ''; + + /** + * The options of this parser. + * + * @var array + */ + public $options = array(); + + /** + * The last delimiter used. + * + * @var string + */ + public $delimiter; + + /** + * The length of the delimiter. + * + * @var int + */ + public $delimiterLen; + + /** + * The current status of the parser. + * + * @var int + */ + public $status; + + /** + * The last incomplete query that was extracted. + * + * @var string + */ + public $current = ''; + + /** + * Constructor. + * + * @param string $query The query to be parsed. + * @param array $options The options of this parser. + */ + public function __construct($query = '', array $options = array()) + { + // Merges specified options with defaults. + $this->options = array_merge( + array( + /** + * The starting delimiter. + * @var string + */ + 'delimiter' => ';', + + /** + * Whether `DELIMITER` statements should be parsed. + * @var bool + */ + 'parse_delimiter' => false, + + /** + * Whether a delimiter should be added at the end of the + * statement. + * @var bool + */ + 'add_delimiter' => false, + ), + $options + ); + + $this->query = ''; + $this->setDelimiter($this->options['delimiter']); + } + + /** + * Sets the delimiter. + * + * Used to update the length of it too. + * + * @param string $delimiter + */ + public function setDelimiter($delimiter) + { + $this->delimiter = $delimiter; + $this->delimiterLen = strlen($delimiter); + } + + /** + * Extracts a statement from the buffer. + * + * @param bool $end Whether the end of the buffer was reached. + * + * @return string + */ + public function extract($end = false) + { + /** + * The last parsed position. + * + * This is statically defined because it is not used outside anywhere + * outside this method and there is probably a (minor) performance + * improvement to it. + * + * @var int + */ + static $i = 0; + + if (empty($this->query)) { + return false; + } + + /** + * The length of the buffer. + * @var int $len + */ + $len = strlen($this->query); + + /** + * The last index of the string that is going to be parsed. + * + * There must be a few characters left in the buffer so the parser can + * avoid confusing some symbols that may have multiple meanings. + * + * For example, if the buffer ends in `-` that may be an operator or the + * beginning of a comment. + * + * Another example if the buffer ends in `DELIMITE`. The parser is going + * to require a few more characters because that may be a part of the + * `DELIMITER` keyword or just a column named `DELIMITE`. + * + * Those extra characters are required only if there is more data + * expected (the end of the buffer was not reached). + * + * @var int $loopLen + */ + $loopLen = $end ? $len : $len - 16; + + for (; $i < $loopLen; ++$i) { + + /* + * Handling special parses statuses. + */ + if ($this->status === static::STATUS_STRING_SINGLE_QUOTES) { + // Single-quoted strings like 'foo'. + if (($this->query[$i - 1] != '\\') && ($this->query[$i] === '\'')) { + $this->status = 0; + } + $this->current .= $this->query[$i]; + continue; + } elseif ($this->status === static::STATUS_STRING_DOUBLE_QUOTES) { + // Double-quoted strings like "bar". + if (($this->query[$i - 1] != '\\') && ($this->query[$i] === '"')) { + $this->status = 0; + } + $this->current .= $this->query[$i]; + continue; + } elseif ($this->status === static::STATUS_STRING_BACKTICK) { + if ($this->query[$i] === '`') { + $this->status = 0; + } + $this->current .= $this->query[$i]; + continue; + } elseif (($this->status === static::STATUS_COMMENT_BASH) + || ($this->status === static::STATUS_COMMENT_SQL) + ) { + // Bash-like (#) or SQL-like (-- ) comments end in new line. + if ($this->query[$i] === "\n") { + $this->status = 0; + } + continue; + } elseif ($this->status === static::STATUS_COMMENT_C) { + // C-like comments end in */. + if (($this->query[$i - 1] === '*') && ($this->query[$i] === '/')) { + $this->status = 0; + } + continue; + } + + /* + * Checking if a string started. + */ + if ($this->query[$i] === '\'') { + $this->status = static::STATUS_STRING_SINGLE_QUOTES; + $this->current .= $this->query[$i]; + continue; + } elseif ($this->query[$i] === '"') { + $this->status = static::STATUS_STRING_DOUBLE_QUOTES; + $this->current .= $this->query[$i]; + continue; + } elseif ($this->query[$i] === '`') { + $this->status = static::STATUS_STRING_BACKTICK; + $this->current .= $this->query[$i]; + continue; + } + + /* + * Checking if a comment started. + */ + if ($this->query[$i] === '#') { + $this->status = static::STATUS_COMMENT_BASH; + continue; + } elseif (($i + 2 < $len) + && ($this->query[$i] === '-') + && ($this->query[$i + 1] === '-') + && (Context::isWhitespace($this->query[$i + 2])) + ) { + $this->status = static::STATUS_COMMENT_SQL; + continue; + } elseif (($i + 2 < $len) + && ($this->query[$i] === '/') + && ($this->query[$i + 1] === '*') + && ($this->query[$i + 2] !== '!') + ) { + $this->status = static::STATUS_COMMENT_C; + continue; + } + + /* + * Handling `DELIMITER` statement. + * + * The code below basically checks for + * `strtoupper(substr($this->query, $i, 9)) === 'DELIMITER'` + * + * This optimization makes the code about 3 times faster. + */ + if ((($this->query[$i] === 'D') || ($this->query[$i] === 'd')) + && (($this->query[$i + 1] === 'E') || ($this->query[$i + 1] === 'e')) + && (($this->query[$i + 2] === 'L') || ($this->query[$i + 2] === 'l')) + && (($this->query[$i + 3] === 'I') || ($this->query[$i + 3] === 'i')) + && (($this->query[$i + 4] === 'M') || ($this->query[$i + 4] === 'm')) + && (($this->query[$i + 5] === 'I') || ($this->query[$i + 5] === 'i')) + && (($this->query[$i + 6] === 'T') || ($this->query[$i + 6] === 't')) + && (($this->query[$i + 7] === 'E') || ($this->query[$i + 7] === 'e')) + && (($this->query[$i + 8] === 'R') || ($this->query[$i + 8] === 'r')) + ) { + + // Saving the current index to be able to revert any parsing + // done in this block. + $iBak = $i; + $i += 9; // Skipping `DELIMITER`. + + // Skipping whitespaces. + while (($i < $len) && (Context::isWhitespace($this->query[$i]))) { + ++$i; + } + + // Checking if any whitespace was found between keyword + // `DELIMITER` and the actual delimiter. + if ($iBak + 9 === $i) { + $i = $iBak; + return false; + } + + // Parsing the delimiter. + $delimiter = ''; + while (($i < $len) && (!Context::isWhitespace($this->query[$i]))) { + $delimiter .= $this->query[$i++]; + } + + // Checking if the delimiter definition ended. + if ((($i < $len) && (Context::isWhitespace($this->query[$i]))) + || (($i === $len) && ($end)) + ) { + + // Saving the delimiter. + $this->setDelimiter($delimiter); + + // Whether this statement should be returned or not. + $ret = ''; + if (!empty($this->options['parse_delimiter'])) { + + // Appending the `DELIMITER` statement that was just + // found to the current statement. + $ret = trim( + $this->current . ' ' . substr($this->query, $iBak, $i - $iBak) + ); + } + + // Removing the statement that was just extracted from the + // query. + $this->query = substr($this->query, $i); + $i = 0; + + // Resetting the current statement. + $this->current = ''; + + return $ret; + } + + // Incomplete statement. Reverting + $i = $iBak; + return false; + } + + /* + * Checking if the current statement finished. + * + * The first letter of the delimiter is being checked as an + * optimization. This code is almost as fast as the one above. + * + * There is no point in checking if two strings match if not even + * the first letter matches. + */ + if (($this->query[$i] === $this->delimiter[0]) + && (($this->delimiterLen === 1) + || (substr($this->query, $i, $this->delimiterLen) === $this->delimiter)) + ) { + + // Saving the statement that just ended. + $ret = $this->current; + + // If needed, adds a delimiter at the end of the statement. + if (!empty($this->options['add_delimiter'])) { + $ret .= $this->delimiter; + } + + // Removing the statement that was just extracted from the + // query. + $this->query = substr($this->query, $i + $this->delimiterLen); + $i = 0; + + // Resetting the current statement. + $this->current = ''; + + // Returning the statement. + return trim($ret); + } + + /* + * Appending current character to current statement. + */ + $this->current .= $this->query[$i]; + } + + if (($end) && ($i === $len)) { + // If the end of the buffer was reached, the buffer is emptied and + // the current statement that was extracted is returned. + $this->query = ''; + $i = 0; + return trim($this->current); + } + + return false; + } +} diff --git a/src/Utils/Query.php b/src/Utils/Query.php index 86325ed..4a9ea01 100644 --- a/src/Utils/Query.php +++ b/src/Utils/Query.php @@ -36,7 +36,7 @@ use SqlParser\Statements\UpdateStatement; /** * Statement utilities. * - * @category Routines + * @category Statement * @package SqlParser * @subpackage Utils * @author Dan Ungureanu <udan1107@gmail.com> diff --git a/src/Utils/Table.php b/src/Utils/Table.php index c5ce2fe..81699ad 100644 --- a/src/Utils/Table.php +++ b/src/Utils/Table.php @@ -13,7 +13,7 @@ use SqlParser\Statements\CreateStatement; /** * Table utilities. * - * @category Tables + * @category Statement * @package SqlParser * @subpackage Utils * @author Dan Ungureanu <udan1107@gmail.com> |