diff options
-rw-r--r-- | CHANGELOG.md | 1 | ||||
-rw-r--r-- | src/UtfString.php | 6 | ||||
-rw-r--r-- | tests/Misc/UtfStringTest.php | 31 |
3 files changed, 37 insertions, 1 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 831b7a8..b87bfc7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## [Unreleased] * Fixed parsing SQL comment at the end of query. +* Improved handing of non utf-8 strings. ## [4.1.7] - 2017-06-06 diff --git a/src/UtfString.php b/src/UtfString.php index e3dc512..0d2e0c5 100644 --- a/src/UtfString.php +++ b/src/UtfString.php @@ -75,7 +75,11 @@ class UtfString implements \ArrayAccess $this->byteIdx = 0; $this->charIdx = 0; $this->byteLen = mb_strlen($str, '8bit'); - $this->charLen = mb_strlen($str, 'UTF-8'); + if (! mb_check_encoding($str, 'UTF-8')) { + $this->charLen = 0; + } else { + $this->charLen = mb_strlen($str, 'UTF-8'); + } } /** diff --git a/tests/Misc/UtfStringTest.php b/tests/Misc/UtfStringTest.php index d7c4404..1e5222c 100644 --- a/tests/Misc/UtfStringTest.php +++ b/tests/Misc/UtfStringTest.php @@ -82,4 +82,35 @@ class UtfStringTest extends TestCase $str = new UtfString(static::TEST_PHRASE); $this->assertEquals(static::TEST_PHRASE, (string) $str); } + + /** + * Test access to string + * + * @dataProvider utf8_strings + */ + public function testAccess($text, $pos10, $pos20) + { + $str = new UtfString($text); + $this->assertEquals($pos10, $str->offsetGet(10)); + $this->assertEquals($pos20, $str->offsetGet(20)); + $this->assertEquals($pos10, $str->offsetGet(10)); + } + + public function utf8_strings() + { + return array( + 'ascii' => array( + 'abcdefghijklmnopqrstuvwxyz', 'k', 'u' + ), + 'unicode' => array( + 'áéíóúýěřťǔǐǒǎšďȟǰǩľžčǚň', 'ǐ', 'č' + ), + 'emoji' => array( + '😂😄😃😀😊😉😍😘😚😗😂👿😮😨😱😠😡😤😖😆😋👯', '😂', '😋' + ), + 'iso' => array( + "P\xf8\xed\xb9ern\xec \xbelu\xbbou\xe8k\xfd k\xf3d \xfap\xecl \xef\xe1belsk\xe9 k\xf3dy", null, null + ), + ); + } } |