summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDan Ungureanu <udan1107@gmail.com>2015-08-02 22:04:05 +0300
committerDan Ungureanu <udan1107@gmail.com>2015-08-02 22:04:05 +0300
commit9130cca0d17169b4647d39b644cc6483b600623c (patch)
treeaab84450559ba2651f31d92648ff9549f25eed5e /src
parente6a562e3e5bfbbbc03d469e50d65d063cac9ebd4 (diff)
downloadsql-parser-9130cca0d17169b4647d39b644cc6483b600623c.zip
sql-parser-9130cca0d17169b4647d39b644cc6483b600623c.tar.gz
sql-parser-9130cca0d17169b4647d39b644cc6483b600623c.tar.bz2
Added utility to get first full statement from a buffer.
Misc coding style fixes.
Diffstat (limited to 'src')
-rw-r--r--src/Lexer.php92
-rw-r--r--src/Utils/Query.php63
2 files changed, 117 insertions, 38 deletions
diff --git a/src/Lexer.php b/src/Lexer.php
index 697218c..ba4f6ef 100644
--- a/src/Lexer.php
+++ b/src/Lexer.php
@@ -63,37 +63,38 @@ namespace SqlParser {
{
/**
- * A list of methods that are used in lexing the SQL query.
- *
- * @var array
- */
+ * A list of methods that are used in lexing the SQL query.
+ *
+ * @var array
+ */
public static $PARSER_METHODS = array(
- // It is best to put the parsers in order of their complexity
- // (ascending) and their occurrence rate (descending).
- //
- // Conflicts:
- //
- // 1. `parseDelimiter` and `parseUnknown`, `parseKeyword`, `parseNumber`
- // They fight over delimiter. The delimiter may be a keyword, a number
- // or almost any character which makes the delimiter one of the first
- // tokens that must be parsed.
- //
- // 1. `parseNumber` and `parseOperator`
- // They fight over `+` and `-`.
- //
- // 2. `parseComment` and `parseOperator`
- // They fight over `/` (as in ```/*comment*/``` or ```a / b```)
- //
- // 3. `parseBool` and `parseKeyword`
- // They fight over `TRUE` and `FALSE`.
- //
- // 4. `parseKeyword` and `parseUnknown`
- // They fight over words. `parseUnknown` does not know about keywords.
-
- 'parseDelimiter', 'parseWhitespace', 'parseNumber', 'parseComment',
- 'parseOperator', 'parseBool', 'parseString', 'parseSymbol',
- 'parseKeyword', 'parseUnknown'
+ // It is best to put the parsers in order of their complexity
+ // (ascending) and their occurrence rate (descending).
+ //
+ // Conflicts:
+ //
+ // 1. `parseDelimiter`, `parseUnknown`, `parseKeyword`, `parseNumber`
+ // They fight over delimiter. The delimiter may be a keyword, a
+ // number or almost any character which makes the delimiter one of
+ // the first tokens that must be parsed.
+ //
+ // 1. `parseNumber` and `parseOperator`
+ // They fight over `+` and `-`.
+ //
+ // 2. `parseComment` and `parseOperator`
+ // They fight over `/` (as in ```/*comment*/``` or ```a / b```)
+ //
+ // 3. `parseBool` and `parseKeyword`
+ // They fight over `TRUE` and `FALSE`.
+ //
+ // 4. `parseKeyword` and `parseUnknown`
+ // They fight over words. `parseUnknown` does not know about
+ // keywords.
+
+ 'parseDelimiter', 'parseWhitespace', 'parseNumber',
+ 'parseComment', 'parseOperator', 'parseBool', 'parseString',
+ 'parseSymbol', 'parseKeyword', 'parseUnknown'
);
/**
@@ -149,7 +150,7 @@ namespace SqlParser {
*
* @var string
*/
- public $delimiter = ';';
+ public $delimiter;
/**
* The length of the delimiter.
@@ -160,7 +161,7 @@ namespace SqlParser {
*
* @var int
*/
- public $delimiterLen = 1;
+ public $delimiterLen;
/**
* List of errors that occurred during lexing.
@@ -178,10 +179,12 @@ namespace SqlParser {
/**
* Constructor.
*
- * @param string|UtfString $str The query to be lexed.
- * @param bool $strict Whether strict mode should be enabled or not.
+ * @param string|UtfString $str The query to be lexed.
+ * @param bool $strict Whether strict mode should be
+ * enabled or not.
+ * @param string $delimiter The delimiter to be used.
*/
- public function __construct($str, $strict = false)
+ public function __construct($str, $strict = false, $delimiter = null)
{
// `strlen` is used instead of `mb_strlen` because the lexer needs to
// parse each byte of the input.
@@ -201,12 +204,25 @@ namespace SqlParser {
$this->strict = $strict;
// Setting the delimiter.
- $this->delimiter = static::$DEFAULT_DELIMITER;
+ $this->setDelimiter(
+ !empty($delimiter) ? $delimiter : static::$DEFAULT_DELIMITER
+ );
$this->lex();
}
/**
+ * Sets the delimiter.
+ *
+ * @param string $delimiter The new delimiter.
+ */
+ public function setDelimiter($delimiter)
+ {
+ $this->delimiter = $delimiter;
+ $this->delimiterLen = strlen($delimiter);
+ }
+
+ /**
* Parses the string and extracts lexemes.
*
* @return void
@@ -521,9 +537,9 @@ namespace SqlParser {
while ((++$this->last < $this->len) && ($this->str[$this->last] !== "\n")) {
$token .= $this->str[$this->last];
}
- if ($this->last < $this->len) {
- $token .= $this->str[$this->last];
- }
+
+ // Adding the line ending.
+ $token .= "\n";
}
return new Token($token, Token::TYPE_COMMENT, Token::FLAG_COMMENT_SQL);
}
diff --git a/src/Utils/Query.php b/src/Utils/Query.php
index c11e858..d8e9500 100644
--- a/src/Utils/Query.php
+++ b/src/Utils/Query.php
@@ -587,6 +587,10 @@ class Query
for ($i = $statement->first; $i <= $statement->last; ++$i) {
$token = $list->tokens[$i];
+ if ($token->type === Token::TYPE_COMMENT) {
+ continue;
+ }
+
if ($token->type === Token::TYPE_OPERATOR) {
if ($token->value === '(') {
++$brackets;
@@ -708,4 +712,63 @@ class Query
return $ret;
}
+
+ /**
+ * Gets the first full statement in the query.
+ *
+ * @param string $query The query to be analyzed.
+ * @param string $delimiter The delimiter to be used.
+ *
+ * @return array Array containing the first full query, the
+ * remaining part of the query and the last
+ * delimiter.
+ */
+ public static function getFirstStatement($query, $delimiter = null)
+ {
+ $lexer = new Lexer($query, false, $delimiter);
+ $list = $lexer->list;
+
+ /**
+ * Whether a full statement was found.
+ * @var bool
+ */
+ $fullStatement = false;
+
+ /**
+ * The first full statement.
+ * @var string
+ */
+ $statement = '';
+
+ for ($list->idx = 0; $list->idx < $list->count; ++$list->idx) {
+ $token = $list->tokens[$list->idx];
+
+ if ($token->type === Token::TYPE_COMMENT) {
+ continue;
+ }
+
+ $statement .= $token->token;
+
+ if (($token->type === Token::TYPE_DELIMITER) && (!empty($token->value))) {
+ $delimiter = $token->value;
+ $fullStatement = true;
+ break;
+ }
+ }
+
+ // No statement was found so we return the entire query as being the
+ // remaining part.
+ if (!$fullStatement) {
+ return array(null, $query, $delimiter);
+ }
+
+ // At least one query was found so we have to build the rest of the
+ // remaining query.
+ $query = '';
+ for (++$list->idx; $list->idx < $list->count; ++$list->idx) {
+ $query .= $list->tokens[$list->idx]->value;
+ }
+
+ return array(trim($statement), $query, $delimiter);
+ }
}