summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG.md1
-rw-r--r--src/UtfString.php6
-rw-r--r--tests/Misc/UtfStringTest.php31
3 files changed, 37 insertions, 1 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 831b7a8..b87bfc7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,7 @@
## [Unreleased]
* Fixed parsing SQL comment at the end of query.
+* Improved handing of non utf-8 strings.
## [4.1.7] - 2017-06-06
diff --git a/src/UtfString.php b/src/UtfString.php
index e3dc512..0d2e0c5 100644
--- a/src/UtfString.php
+++ b/src/UtfString.php
@@ -75,7 +75,11 @@ class UtfString implements \ArrayAccess
$this->byteIdx = 0;
$this->charIdx = 0;
$this->byteLen = mb_strlen($str, '8bit');
- $this->charLen = mb_strlen($str, 'UTF-8');
+ if (! mb_check_encoding($str, 'UTF-8')) {
+ $this->charLen = 0;
+ } else {
+ $this->charLen = mb_strlen($str, 'UTF-8');
+ }
}
/**
diff --git a/tests/Misc/UtfStringTest.php b/tests/Misc/UtfStringTest.php
index d7c4404..1e5222c 100644
--- a/tests/Misc/UtfStringTest.php
+++ b/tests/Misc/UtfStringTest.php
@@ -82,4 +82,35 @@ class UtfStringTest extends TestCase
$str = new UtfString(static::TEST_PHRASE);
$this->assertEquals(static::TEST_PHRASE, (string) $str);
}
+
+ /**
+ * Test access to string
+ *
+ * @dataProvider utf8_strings
+ */
+ public function testAccess($text, $pos10, $pos20)
+ {
+ $str = new UtfString($text);
+ $this->assertEquals($pos10, $str->offsetGet(10));
+ $this->assertEquals($pos20, $str->offsetGet(20));
+ $this->assertEquals($pos10, $str->offsetGet(10));
+ }
+
+ public function utf8_strings()
+ {
+ return array(
+ 'ascii' => array(
+ 'abcdefghijklmnopqrstuvwxyz', 'k', 'u'
+ ),
+ 'unicode' => array(
+ 'áéíóúýěřťǔǐǒǎšďȟǰǩľžčǚň', 'ǐ', 'č'
+ ),
+ 'emoji' => array(
+ '😂😄😃😀😊😉😍😘😚😗😂👿😮😨😱😠😡😤😖😆😋👯', '😂', '😋'
+ ),
+ 'iso' => array(
+ "P\xf8\xed\xb9ern\xec \xbelu\xbbou\xe8k\xfd k\xf3d \xfap\xecl \xef\xe1belsk\xe9 k\xf3dy", null, null
+ ),
+ );
+ }
}