diff options
author | Ignace Nyamagana Butera <nyamsprod@gmail.com> | 2015-04-21 09:06:11 +0200 |
---|---|---|
committer | Ignace Nyamagana Butera <nyamsprod@gmail.com> | 2015-04-22 11:30:11 +0200 |
commit | 23b43fc9386bf007ca28d6e361e8d265647b35c9 (patch) | |
tree | 61bab2bf7bcf71ea7ddc34999eb5052a8b8b45d1 | |
parent | 6b385a6513d3018814ff4bdd036c4453a3613419 (diff) | |
download | csv-23b43fc9386bf007ca28d6e361e8d265647b35c9.zip csv-23b43fc9386bf007ca28d6e361e8d265647b35c9.tar.gz csv-23b43fc9386bf007ca28d6e361e8d265647b35c9.tar.bz2 |
adding QueryFilter::stripBom method
-rw-r--r-- | CHANGELOG.md | 6 | ||||
-rw-r--r-- | composer.json | 2 | ||||
-rw-r--r-- | src/Config/Output.php | 3 | ||||
-rw-r--r-- | src/Modifier/QueryFilter.php | 63 | ||||
-rw-r--r-- | src/Reader.php | 15 | ||||
-rw-r--r-- | test/ReaderTest.php | 57 |
6 files changed, 139 insertions, 7 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 3eb1161..172b953 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,12 @@ #Changelog All Notable changes to `League\Csv` will be documented in this file +## Next - XXXX-XX-XX + +### Added + +- `Reader::stripBOM` query filtering method to ease removing the BOM sequence when querying the CSV document. + ## 7.0.1 - 2015-03-23 ### Fixed diff --git a/composer.json b/composer.json index fc12e7e..04d2205 100644 --- a/composer.json +++ b/composer.json @@ -37,7 +37,7 @@ }, "extra": { "branch-alias": { - "dev-master": "7.0-dev" + "dev-master": "7.1-dev" } } } diff --git a/src/Config/Output.php b/src/Config/Output.php index 81b6434..6ba19ce 100644 --- a/src/Config/Output.php +++ b/src/Config/Output.php @@ -100,6 +100,7 @@ trait Output { if (empty($str)) { $this->output_bom = null; + return $this; } $str = (string) $str; @@ -187,7 +188,7 @@ trait Output $csv->rewind(); $csv->setFlags(SplFileObject::READ_CSV); if (! empty($bom) && ! empty($input_bom)) { - $csv->fseek(strlen($input_bom)); + $csv->fseek(mb_strlen($input_bom)); } echo $bom; $res = $csv->fpassthru(); diff --git a/src/Modifier/QueryFilter.php b/src/Modifier/QueryFilter.php index 4602d75..c37d430 100644 --- a/src/Modifier/QueryFilter.php +++ b/src/Modifier/QueryFilter.php @@ -56,6 +56,69 @@ trait QueryFilter protected $iterator_limit = -1; /** + * Stripping BOM status + * + * @var boolean + */ + protected $strip_bom = false; + + /** + * Stripping BOM setter + * + * @param bool $status + * + * @return $this + */ + public function stripBom($status) + { + $this->strip_bom = (bool) $status; + + return $this; + } + + /** + * Tell whethe we can strip or not the leading BOM sequence + * + * @return boolean [description] + */ + protected function isBomStrippable() + { + $bom = $this->getInputBom(); + + return ! empty($bom) && $this->strip_bom; + } + + /** + * Remove the BOM sequence from the CSV + * + * @param Iterator $iterator + * + * @return Iterator + */ + protected function applyBomStripping(Iterator $iterator) + { + if (! $this->isBomStrippable()) { + return $iterator; + } + + $bom = $this->getInputBom(); + return new MapIterator($iterator, function ($row, $index) use ($bom) { + if (0 == $index) { + $row[0] = mb_substr($row[0], mb_strlen($bom)); + } + + return $row; + }); + } + + /** + * Returns the BOM sequence of the given CSV + * + * @return string + */ + abstract public function getInputBom(); + + /** * Set LimitIterator Offset * * @param $offset diff --git a/src/Reader.php b/src/Reader.php index 1f73476..4e764b5 100644 --- a/src/Reader.php +++ b/src/Reader.php @@ -45,7 +45,9 @@ class Reader extends AbstractCsv */ public function query(callable $callable = null) { - $iterator = new CallbackFilterIterator($this->getIterator(), function ($row) { + $iterator = $this->getIterator(); + $iterator = $this->applyBomStripping($iterator); + $iterator = new CallbackFilterIterator($iterator, function ($row) { return is_array($row); }); @@ -53,7 +55,7 @@ class Reader extends AbstractCsv $iterator = $this->applyIteratorSortBy($iterator); $iterator = $this->applyIteratorInterval($iterator); if (! is_null($callable)) { - $iterator = new Modifier\MapIterator($iterator, $callable); + return new Modifier\MapIterator($iterator, $callable); } return $iterator; @@ -65,11 +67,11 @@ class Reader extends AbstractCsv protected function getConversionIterator() { $iterator = $this->getIterator(); + $iterator = $this->applyBomStripping($iterator); $iterator = $this->applyIteratorFilter($iterator); $iterator = $this->applyIteratorSortBy($iterator); - $iterator = $this->applyIteratorInterval($iterator); - return $iterator; + return $this->applyIteratorInterval($iterator); } /** @@ -239,6 +241,11 @@ class Reader extends AbstractCsv throw new InvalidArgumentException('the specified row does not exist'); } + if (0 == $offset && $this->isBomStrippable()) { + $bom = $this->getInputBom(); + $res[0] = mb_substr($res[0], mb_strlen($bom)); + } + return $res; } diff --git a/test/ReaderTest.php b/test/ReaderTest.php index 6d0ed4a..e9efb83 100644 --- a/test/ReaderTest.php +++ b/test/ReaderTest.php @@ -193,7 +193,62 @@ class ReaderTest extends PHPUnit_Framework_TestCase } /** - * @expectedException InvalidArgumentException + * @param $expected + * @dataProvider validBOMSequences + */ + public function testStripBOM($expected, $res) + { + $tmpFile = new SplTempFileObject(); + foreach ($expected as $row) { + $tmpFile->fputcsv($row); + } + $csv = Reader::createFromFileObject($tmpFile); + $csv->setFlags(SplFileObject::READ_AHEAD|SplFileObject::SKIP_EMPTY); + $csv->stripBom(true); + + $this->assertSame($res, $csv->fetchAll()[0][0]); + } + + public function validBOMSequences() + { + return [ + 'withBOM' => [[ + [Reader::BOM_UTF16_LE.'john', 'doe', 'john.doe@example.com', ], + ['jane', 'doe', 'jane.doe@example.com', ], + ], 'john'], + 'withDoubleBOM' => [[ + [Reader::BOM_UTF16_LE.Reader::BOM_UTF16_LE.'john', 'doe', 'john.doe@example.com', ], + ['jane', 'doe', 'jane.doe@example.com', ], + ], Reader::BOM_UTF16_LE.'john'], + 'withoutBOM' => [[ + ['john', 'doe', 'john.doe@example.com', ], + ['jane', 'doe', 'jane.doe@example.com', ], + ], 'john'], + ]; + } + + public function testStripBOMWithFetchAssoc() + { + $tmpFile = new SplTempFileObject(); + $expected = [ + [Reader::BOM_UTF16_LE.'john', 'doe', 'john.doe@example.com', ], + ['jane', 'doe', 'jane.doe@example.com', ], + ]; + + $tmpFile = new SplTempFileObject(); + foreach ($expected as $row) { + $tmpFile->fputcsv($row); + } + $csv = Reader::createFromFileObject($tmpFile); + $csv->setFlags(SplFileObject::READ_AHEAD|SplFileObject::SKIP_EMPTY); + $csv->stripBom(true); + $res = array_keys($csv->fetchAssoc()[0]); + + $this->assertSame('john', $res[0]); + } + + /** + * @expectedException \InvalidArgumentException * @expectedExceptionMessage Use a flat non empty array with unique string values */ public function testFetchAssocKeyFailure() |