summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Components/OptionsArray.php4
-rw-r--r--src/Context.php4
-rw-r--r--src/Lexer.php32
-rw-r--r--src/UtfString.php43
4 files changed, 40 insertions, 43 deletions
diff --git a/src/Components/OptionsArray.php b/src/Components/OptionsArray.php
index a6b612d..487eefd 100644
--- a/src/Components/OptionsArray.php
+++ b/src/Components/OptionsArray.php
@@ -201,8 +201,8 @@ class OptionsArray extends Component
$list,
empty($lastOption[2]) ? array() : $lastOption[2]
);
- $ret->options[$lastOptionId]['value'] =
- $ret->options[$lastOptionId]['expr']->expr;
+ $ret->options[$lastOptionId]['value']
+ = $ret->options[$lastOptionId]['expr']->expr;
$lastOption = null;
$state = 0;
} else {
diff --git a/src/Context.php b/src/Context.php
index efcb8df..dd912fb 100644
--- a/src/Context.php
+++ b/src/Context.php
@@ -397,7 +397,9 @@ abstract class Context
*/
public static function isSeparator($str)
{
- return !ctype_alnum($str) && $str !== '_';
+ // NOTES: Only ASCII characters may be separators.
+ // `~` is the last printable ASCII character.
+ return ($str <= '~') && (!ctype_alnum($str)) && ($str !== '_');
}
/**
diff --git a/src/Lexer.php b/src/Lexer.php
index d70641f..6e8c0fd 100644
--- a/src/Lexer.php
+++ b/src/Lexer.php
@@ -13,6 +13,16 @@ namespace SqlParser;
use SqlParser\Exceptions\LexerException;
+if (!defined('USE_UTF_STRINGS')) {
+
+ /**
+ * Forces usage of `UtfString` if the string is multibyte.
+ * `UtfString` may be slower, but it gives better results.
+ * @var bool
+ */
+ define('USE_UTF_STRINGS', true);
+}
+
/**
* Performs lexical analysis over a SQL statement and splits it in multiple
* tokens.
@@ -149,11 +159,27 @@ class Lexer
*/
public function __construct($str, $strict = false)
{
- $this->str = $str;
- $this->len = ($str instanceof UtfString) ?
- $str->length() : strlen($str);
+ // For multi-byte strings, a new instance of `UtfString` is
+ // initialized (only if `UtfString` usage is forced.
+ if (!($str instanceof UtfString)) {
+ $len = strlen($str);
+ if ((USE_UTF_STRINGS) && ($len != mb_strlen($str))) {
+ $str = new UtfString($str);
+ }
+ }
+
+ if ($str instanceof UtfString) {
+ $this->str = $str;
+ $this->len = $str->length();
+ } else {
+ $this->str = $str;
+ // `strlen` is used instead of `mb_strlen` because the lexer
+ // needs to parse each byte of the input.
+ $this->len = $len;
+ }
$this->strict = $strict;
+ // Setting the delimiter.
$this->delimiter = static::$DEFAULT_DELIMITER;
$this->lex();
diff --git a/src/UtfString.php b/src/UtfString.php
index 27e3f93..1e863a4 100644
--- a/src/UtfString.php
+++ b/src/UtfString.php
@@ -93,7 +93,7 @@ class UtfString implements \ArrayAccess
*/
public function offsetExists($offset)
{
- return $offset < $this->charLen;
+ return ($offset >= 0) && ($offset < $this->charLen);
}
/**
@@ -190,26 +190,13 @@ class UtfString implements \ArrayAccess
return 3;
} elseif ($byte < 248) {
return 4;
- } elseif ($byte === 252) {
+ } elseif ($byte < 252) {
return 5; // unofficial
}
return 6; // unofficial
}
/**
- * Returns the number of remaining characters.
- *
- * @return int
- */
- public function remaining()
- {
- if ($this->charIdx < $this->charLen) {
- return $this->charLen - $this->charIdx;
- }
- return 0;
- }
-
- /**
* Returns the length in characters of the string.
*
* @return int
@@ -220,30 +207,12 @@ class UtfString implements \ArrayAccess
}
/**
- * Gets the values of the indexes.
- *
- * @param int &$byte Reference to the byte index.
- * @param int &$char Reference to the character index.
- *
- * @return void
- */
- public function getIndexes(&$byte, &$char)
- {
- $byte = $this->byteIdx;
- $char = $this->charIdx;
- }
-
- /**
- * Sets the values of the indexes.
+ * Returns the contained string.
*
- * @param int $byte The byte index.
- * @param int $char The character index.
- *
- * @return void
+ * @return strin
*/
- public function setIndexes($byte = 0, $char = 0)
+ public function __toString()
{
- $this->byteIdx = $byte;
- $this->charIdx = $char;
+ return $this->str;
}
}