summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Ungureanu <udan1107@gmail.com>2015-08-09 22:14:00 +0300
committerDan Ungureanu <udan1107@gmail.com>2015-08-09 22:14:00 +0300
commit0ccc3a271390b8491765960cc92575d7b96e05f1 (patch)
tree8bb7b4ee31187bfad3fd2cfecdc87c29f3db3fe8
parent77d5409b57f0ac82cf2ae71f339912c7f175c5cc (diff)
downloadsql-parser-0ccc3a271390b8491765960cc92575d7b96e05f1.zip
sql-parser-0ccc3a271390b8491765960cc92575d7b96e05f1.tar.gz
sql-parser-0ccc3a271390b8491765960cc92575d7b96e05f1.tar.bz2
Introduced the buffered query parser. This is used to extract statements from a
buffered string that may be parsed using the parser. DELIMITER is no longer reported as unexpected beginning of statement. Fixed typos.
-rw-r--r--src/Components/ExpressionArray.php4
-rw-r--r--src/Parser.php10
-rw-r--r--src/Utils/BufferedQuery.php390
-rw-r--r--src/Utils/Query.php2
-rw-r--r--src/Utils/Table.php2
5 files changed, 404 insertions, 4 deletions
diff --git a/src/Components/ExpressionArray.php b/src/Components/ExpressionArray.php
index f453154..309972a 100644
--- a/src/Components/ExpressionArray.php
+++ b/src/Components/ExpressionArray.php
@@ -1,7 +1,7 @@
<?php
/**
- * Parses a a list of expression delimited by a comma.
+ * Parses a a list of expressions delimited by a comma.
*
* @package SqlParser
* @subpackage Components
@@ -14,7 +14,7 @@ use SqlParser\Token;
use SqlParser\TokensList;
/**
- * Parses a a list of expression delimited by a comma.
+ * Parses a a list of expressions delimited by a comma.
*
* @category Keywords
* @package SqlParser
diff --git a/src/Parser.php b/src/Parser.php
index b44ecc6..ac92aaf 100644
--- a/src/Parser.php
+++ b/src/Parser.php
@@ -368,6 +368,16 @@ namespace SqlParser {
*/
$token = $list->tokens[$list->idx];
+ // `DELIMITER` is not an actual statement and it requires
+ // special handling.
+ if (($token->type === Token::TYPE_NONE)
+ && (strtoupper($token->token) === 'DELIMITER')
+ ) {
+ // Skipping to the end of this statement.
+ $list->getNextOfType(Token::TYPE_DELIMITER);
+ continue;
+ }
+
// Statements can start with keywords only.
// Comments, whitespaces, etc. are ignored.
if ($token->type !== Token::TYPE_KEYWORD) {
diff --git a/src/Utils/BufferedQuery.php b/src/Utils/BufferedQuery.php
new file mode 100644
index 0000000..dc2bdcb
--- /dev/null
+++ b/src/Utils/BufferedQuery.php
@@ -0,0 +1,390 @@
+<?php
+
+/**
+ * Buffered query utilities.
+ *
+ * @package SqlParser
+ * @subpackage Utils
+ */
+namespace SqlParser\Utils;
+
+use SqlParser\Context;
+
+/**
+ * Buffer query utilities.
+ *
+ * Implements a specialized lexer used to extract statements from large inputs
+ * that are being buffered. After each statement has been extracted, a lexer or
+ * a parser may be used.
+ *
+ * All comments are skipped, with one exception: MySQL commands inside `/*!`.
+ *
+ * @category Lexer
+ * @package SqlParser
+ * @subpackage Utils
+ * @author Dan Ungureanu <udan1107@gmail.com>
+ * @license http://opensource.org/licenses/GPL-2.0 GNU Public License
+ */
+class BufferedQuery
+{
+
+ // Constants that describe the current status of the parser.
+ const STATUS_STRING_SINGLE_QUOTES = 1;
+ const STATUS_STRING_DOUBLE_QUOTES = 2;
+ const STATUS_STRING_BACKTICK = 3;
+ const STATUS_COMMENT_BASH = 4;
+ const STATUS_COMMENT_C = 5;
+ const STATUS_COMMENT_SQL = 6;
+
+ /**
+ * The query that is being processed.
+ *
+ * This field can be modified just by appending to it!
+ *
+ * @var string
+ */
+ public $query = '';
+
+ /**
+ * The options of this parser.
+ *
+ * @var array
+ */
+ public $options = array();
+
+ /**
+ * The last delimiter used.
+ *
+ * @var string
+ */
+ public $delimiter;
+
+ /**
+ * The length of the delimiter.
+ *
+ * @var int
+ */
+ public $delimiterLen;
+
+ /**
+ * The current status of the parser.
+ *
+ * @var int
+ */
+ public $status;
+
+ /**
+ * The last incomplete query that was extracted.
+ *
+ * @var string
+ */
+ public $current = '';
+
+ /**
+ * Constructor.
+ *
+ * @param string $query The query to be parsed.
+ * @param array $options The options of this parser.
+ */
+ public function __construct($query = '', array $options = array())
+ {
+ // Merges specified options with defaults.
+ $this->options = array_merge(
+ array(
+ /**
+ * The starting delimiter.
+ * @var string
+ */
+ 'delimiter' => ';',
+
+ /**
+ * Whether `DELIMITER` statements should be parsed.
+ * @var bool
+ */
+ 'parse_delimiter' => false,
+
+ /**
+ * Whether a delimiter should be added at the end of the
+ * statement.
+ * @var bool
+ */
+ 'add_delimiter' => false,
+ ),
+ $options
+ );
+
+ $this->query = '';
+ $this->setDelimiter($this->options['delimiter']);
+ }
+
+ /**
+ * Sets the delimiter.
+ *
+ * Used to update the length of it too.
+ *
+ * @param string $delimiter
+ */
+ public function setDelimiter($delimiter)
+ {
+ $this->delimiter = $delimiter;
+ $this->delimiterLen = strlen($delimiter);
+ }
+
+ /**
+ * Extracts a statement from the buffer.
+ *
+ * @param bool $end Whether the end of the buffer was reached.
+ *
+ * @return string
+ */
+ public function extract($end = false)
+ {
+ /**
+ * The last parsed position.
+ *
+ * This is statically defined because it is not used outside anywhere
+ * outside this method and there is probably a (minor) performance
+ * improvement to it.
+ *
+ * @var int
+ */
+ static $i = 0;
+
+ if (empty($this->query)) {
+ return false;
+ }
+
+ /**
+ * The length of the buffer.
+ * @var int $len
+ */
+ $len = strlen($this->query);
+
+ /**
+ * The last index of the string that is going to be parsed.
+ *
+ * There must be a few characters left in the buffer so the parser can
+ * avoid confusing some symbols that may have multiple meanings.
+ *
+ * For example, if the buffer ends in `-` that may be an operator or the
+ * beginning of a comment.
+ *
+ * Another example if the buffer ends in `DELIMITE`. The parser is going
+ * to require a few more characters because that may be a part of the
+ * `DELIMITER` keyword or just a column named `DELIMITE`.
+ *
+ * Those extra characters are required only if there is more data
+ * expected (the end of the buffer was not reached).
+ *
+ * @var int $loopLen
+ */
+ $loopLen = $end ? $len : $len - 16;
+
+ for (; $i < $loopLen; ++$i) {
+
+ /*
+ * Handling special parses statuses.
+ */
+ if ($this->status === static::STATUS_STRING_SINGLE_QUOTES) {
+ // Single-quoted strings like 'foo'.
+ if (($this->query[$i - 1] != '\\') && ($this->query[$i] === '\'')) {
+ $this->status = 0;
+ }
+ $this->current .= $this->query[$i];
+ continue;
+ } elseif ($this->status === static::STATUS_STRING_DOUBLE_QUOTES) {
+ // Double-quoted strings like "bar".
+ if (($this->query[$i - 1] != '\\') && ($this->query[$i] === '"')) {
+ $this->status = 0;
+ }
+ $this->current .= $this->query[$i];
+ continue;
+ } elseif ($this->status === static::STATUS_STRING_BACKTICK) {
+ if ($this->query[$i] === '`') {
+ $this->status = 0;
+ }
+ $this->current .= $this->query[$i];
+ continue;
+ } elseif (($this->status === static::STATUS_COMMENT_BASH)
+ || ($this->status === static::STATUS_COMMENT_SQL)
+ ) {
+ // Bash-like (#) or SQL-like (-- ) comments end in new line.
+ if ($this->query[$i] === "\n") {
+ $this->status = 0;
+ }
+ continue;
+ } elseif ($this->status === static::STATUS_COMMENT_C) {
+ // C-like comments end in */.
+ if (($this->query[$i - 1] === '*') && ($this->query[$i] === '/')) {
+ $this->status = 0;
+ }
+ continue;
+ }
+
+ /*
+ * Checking if a string started.
+ */
+ if ($this->query[$i] === '\'') {
+ $this->status = static::STATUS_STRING_SINGLE_QUOTES;
+ $this->current .= $this->query[$i];
+ continue;
+ } elseif ($this->query[$i] === '"') {
+ $this->status = static::STATUS_STRING_DOUBLE_QUOTES;
+ $this->current .= $this->query[$i];
+ continue;
+ } elseif ($this->query[$i] === '`') {
+ $this->status = static::STATUS_STRING_BACKTICK;
+ $this->current .= $this->query[$i];
+ continue;
+ }
+
+ /*
+ * Checking if a comment started.
+ */
+ if ($this->query[$i] === '#') {
+ $this->status = static::STATUS_COMMENT_BASH;
+ continue;
+ } elseif (($i + 2 < $len)
+ && ($this->query[$i] === '-')
+ && ($this->query[$i + 1] === '-')
+ && (Context::isWhitespace($this->query[$i + 2]))
+ ) {
+ $this->status = static::STATUS_COMMENT_SQL;
+ continue;
+ } elseif (($i + 2 < $len)
+ && ($this->query[$i] === '/')
+ && ($this->query[$i + 1] === '*')
+ && ($this->query[$i + 2] !== '!')
+ ) {
+ $this->status = static::STATUS_COMMENT_C;
+ continue;
+ }
+
+ /*
+ * Handling `DELIMITER` statement.
+ *
+ * The code below basically checks for
+ * `strtoupper(substr($this->query, $i, 9)) === 'DELIMITER'`
+ *
+ * This optimization makes the code about 3 times faster.
+ */
+ if ((($this->query[$i] === 'D') || ($this->query[$i] === 'd'))
+ && (($this->query[$i + 1] === 'E') || ($this->query[$i + 1] === 'e'))
+ && (($this->query[$i + 2] === 'L') || ($this->query[$i + 2] === 'l'))
+ && (($this->query[$i + 3] === 'I') || ($this->query[$i + 3] === 'i'))
+ && (($this->query[$i + 4] === 'M') || ($this->query[$i + 4] === 'm'))
+ && (($this->query[$i + 5] === 'I') || ($this->query[$i + 5] === 'i'))
+ && (($this->query[$i + 6] === 'T') || ($this->query[$i + 6] === 't'))
+ && (($this->query[$i + 7] === 'E') || ($this->query[$i + 7] === 'e'))
+ && (($this->query[$i + 8] === 'R') || ($this->query[$i + 8] === 'r'))
+ ) {
+
+ // Saving the current index to be able to revert any parsing
+ // done in this block.
+ $iBak = $i;
+ $i += 9; // Skipping `DELIMITER`.
+
+ // Skipping whitespaces.
+ while (($i < $len) && (Context::isWhitespace($this->query[$i]))) {
+ ++$i;
+ }
+
+ // Checking if any whitespace was found between keyword
+ // `DELIMITER` and the actual delimiter.
+ if ($iBak + 9 === $i) {
+ $i = $iBak;
+ return false;
+ }
+
+ // Parsing the delimiter.
+ $delimiter = '';
+ while (($i < $len) && (!Context::isWhitespace($this->query[$i]))) {
+ $delimiter .= $this->query[$i++];
+ }
+
+ // Checking if the delimiter definition ended.
+ if ((($i < $len) && (Context::isWhitespace($this->query[$i])))
+ || (($i === $len) && ($end))
+ ) {
+
+ // Saving the delimiter.
+ $this->setDelimiter($delimiter);
+
+ // Whether this statement should be returned or not.
+ $ret = '';
+ if (!empty($this->options['parse_delimiter'])) {
+
+ // Appending the `DELIMITER` statement that was just
+ // found to the current statement.
+ $ret = trim(
+ $this->current . ' ' . substr($this->query, $iBak, $i - $iBak)
+ );
+ }
+
+ // Removing the statement that was just extracted from the
+ // query.
+ $this->query = substr($this->query, $i);
+ $i = 0;
+
+ // Resetting the current statement.
+ $this->current = '';
+
+ return $ret;
+ }
+
+ // Incomplete statement. Reverting
+ $i = $iBak;
+ return false;
+ }
+
+ /*
+ * Checking if the current statement finished.
+ *
+ * The first letter of the delimiter is being checked as an
+ * optimization. This code is almost as fast as the one above.
+ *
+ * There is no point in checking if two strings match if not even
+ * the first letter matches.
+ */
+ if (($this->query[$i] === $this->delimiter[0])
+ && (($this->delimiterLen === 1)
+ || (substr($this->query, $i, $this->delimiterLen) === $this->delimiter))
+ ) {
+
+ // Saving the statement that just ended.
+ $ret = $this->current;
+
+ // If needed, adds a delimiter at the end of the statement.
+ if (!empty($this->options['add_delimiter'])) {
+ $ret .= $this->delimiter;
+ }
+
+ // Removing the statement that was just extracted from the
+ // query.
+ $this->query = substr($this->query, $i + $this->delimiterLen);
+ $i = 0;
+
+ // Resetting the current statement.
+ $this->current = '';
+
+ // Returning the statement.
+ return trim($ret);
+ }
+
+ /*
+ * Appending current character to current statement.
+ */
+ $this->current .= $this->query[$i];
+ }
+
+ if (($end) && ($i === $len)) {
+ // If the end of the buffer was reached, the buffer is emptied and
+ // the current statement that was extracted is returned.
+ $this->query = '';
+ $i = 0;
+ return trim($this->current);
+ }
+
+ return false;
+ }
+}
diff --git a/src/Utils/Query.php b/src/Utils/Query.php
index 86325ed..4a9ea01 100644
--- a/src/Utils/Query.php
+++ b/src/Utils/Query.php
@@ -36,7 +36,7 @@ use SqlParser\Statements\UpdateStatement;
/**
* Statement utilities.
*
- * @category Routines
+ * @category Statement
* @package SqlParser
* @subpackage Utils
* @author Dan Ungureanu <udan1107@gmail.com>
diff --git a/src/Utils/Table.php b/src/Utils/Table.php
index c5ce2fe..81699ad 100644
--- a/src/Utils/Table.php
+++ b/src/Utils/Table.php
@@ -13,7 +13,7 @@ use SqlParser\Statements\CreateStatement;
/**
* Table utilities.
*
- * @category Tables
+ * @category Statement
* @package SqlParser
* @subpackage Utils
* @author Dan Ungureanu <udan1107@gmail.com>