diff options
Diffstat (limited to 'src/Lexer.php')
-rw-r--r-- | src/Lexer.php | 32 |
1 files changed, 29 insertions, 3 deletions
diff --git a/src/Lexer.php b/src/Lexer.php index d70641f..6e8c0fd 100644 --- a/src/Lexer.php +++ b/src/Lexer.php @@ -13,6 +13,16 @@ namespace SqlParser; use SqlParser\Exceptions\LexerException; +if (!defined('USE_UTF_STRINGS')) { + + /** + * Forces usage of `UtfString` if the string is multibyte. + * `UtfString` may be slower, but it gives better results. + * @var bool + */ + define('USE_UTF_STRINGS', true); +} + /** * Performs lexical analysis over a SQL statement and splits it in multiple * tokens. @@ -149,11 +159,27 @@ class Lexer */ public function __construct($str, $strict = false) { - $this->str = $str; - $this->len = ($str instanceof UtfString) ? - $str->length() : strlen($str); + // For multi-byte strings, a new instance of `UtfString` is + // initialized (only if `UtfString` usage is forced. + if (!($str instanceof UtfString)) { + $len = strlen($str); + if ((USE_UTF_STRINGS) && ($len != mb_strlen($str))) { + $str = new UtfString($str); + } + } + + if ($str instanceof UtfString) { + $this->str = $str; + $this->len = $str->length(); + } else { + $this->str = $str; + // `strlen` is used instead of `mb_strlen` because the lexer + // needs to parse each byte of the input. + $this->len = $len; + } $this->strict = $strict; + // Setting the delimiter. $this->delimiter = static::$DEFAULT_DELIMITER; $this->lex(); |