summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIgnace Nyamagana Butera <nyamsprod@gmail.com>2015-04-21 09:06:11 +0200
committerIgnace Nyamagana Butera <nyamsprod@gmail.com>2015-04-22 11:30:11 +0200
commit23b43fc9386bf007ca28d6e361e8d265647b35c9 (patch)
tree61bab2bf7bcf71ea7ddc34999eb5052a8b8b45d1
parent6b385a6513d3018814ff4bdd036c4453a3613419 (diff)
downloadcsv-23b43fc9386bf007ca28d6e361e8d265647b35c9.zip
csv-23b43fc9386bf007ca28d6e361e8d265647b35c9.tar.gz
csv-23b43fc9386bf007ca28d6e361e8d265647b35c9.tar.bz2
adding QueryFilter::stripBom method
-rw-r--r--CHANGELOG.md6
-rw-r--r--composer.json2
-rw-r--r--src/Config/Output.php3
-rw-r--r--src/Modifier/QueryFilter.php63
-rw-r--r--src/Reader.php15
-rw-r--r--test/ReaderTest.php57
6 files changed, 139 insertions, 7 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3eb1161..172b953 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,12 @@
#Changelog
All Notable changes to `League\Csv` will be documented in this file
+## Next - XXXX-XX-XX
+
+### Added
+
+- `Reader::stripBOM` query filtering method to ease removing the BOM sequence when querying the CSV document.
+
## 7.0.1 - 2015-03-23
### Fixed
diff --git a/composer.json b/composer.json
index fc12e7e..04d2205 100644
--- a/composer.json
+++ b/composer.json
@@ -37,7 +37,7 @@
},
"extra": {
"branch-alias": {
- "dev-master": "7.0-dev"
+ "dev-master": "7.1-dev"
}
}
}
diff --git a/src/Config/Output.php b/src/Config/Output.php
index 81b6434..6ba19ce 100644
--- a/src/Config/Output.php
+++ b/src/Config/Output.php
@@ -100,6 +100,7 @@ trait Output
{
if (empty($str)) {
$this->output_bom = null;
+
return $this;
}
$str = (string) $str;
@@ -187,7 +188,7 @@ trait Output
$csv->rewind();
$csv->setFlags(SplFileObject::READ_CSV);
if (! empty($bom) && ! empty($input_bom)) {
- $csv->fseek(strlen($input_bom));
+ $csv->fseek(mb_strlen($input_bom));
}
echo $bom;
$res = $csv->fpassthru();
diff --git a/src/Modifier/QueryFilter.php b/src/Modifier/QueryFilter.php
index 4602d75..c37d430 100644
--- a/src/Modifier/QueryFilter.php
+++ b/src/Modifier/QueryFilter.php
@@ -56,6 +56,69 @@ trait QueryFilter
protected $iterator_limit = -1;
/**
+ * Stripping BOM status
+ *
+ * @var boolean
+ */
+ protected $strip_bom = false;
+
+ /**
+ * Stripping BOM setter
+ *
+ * @param bool $status
+ *
+ * @return $this
+ */
+ public function stripBom($status)
+ {
+ $this->strip_bom = (bool) $status;
+
+ return $this;
+ }
+
+ /**
+ * Tell whethe we can strip or not the leading BOM sequence
+ *
+ * @return boolean [description]
+ */
+ protected function isBomStrippable()
+ {
+ $bom = $this->getInputBom();
+
+ return ! empty($bom) && $this->strip_bom;
+ }
+
+ /**
+ * Remove the BOM sequence from the CSV
+ *
+ * @param Iterator $iterator
+ *
+ * @return Iterator
+ */
+ protected function applyBomStripping(Iterator $iterator)
+ {
+ if (! $this->isBomStrippable()) {
+ return $iterator;
+ }
+
+ $bom = $this->getInputBom();
+ return new MapIterator($iterator, function ($row, $index) use ($bom) {
+ if (0 == $index) {
+ $row[0] = mb_substr($row[0], mb_strlen($bom));
+ }
+
+ return $row;
+ });
+ }
+
+ /**
+ * Returns the BOM sequence of the given CSV
+ *
+ * @return string
+ */
+ abstract public function getInputBom();
+
+ /**
* Set LimitIterator Offset
*
* @param $offset
diff --git a/src/Reader.php b/src/Reader.php
index 1f73476..4e764b5 100644
--- a/src/Reader.php
+++ b/src/Reader.php
@@ -45,7 +45,9 @@ class Reader extends AbstractCsv
*/
public function query(callable $callable = null)
{
- $iterator = new CallbackFilterIterator($this->getIterator(), function ($row) {
+ $iterator = $this->getIterator();
+ $iterator = $this->applyBomStripping($iterator);
+ $iterator = new CallbackFilterIterator($iterator, function ($row) {
return is_array($row);
});
@@ -53,7 +55,7 @@ class Reader extends AbstractCsv
$iterator = $this->applyIteratorSortBy($iterator);
$iterator = $this->applyIteratorInterval($iterator);
if (! is_null($callable)) {
- $iterator = new Modifier\MapIterator($iterator, $callable);
+ return new Modifier\MapIterator($iterator, $callable);
}
return $iterator;
@@ -65,11 +67,11 @@ class Reader extends AbstractCsv
protected function getConversionIterator()
{
$iterator = $this->getIterator();
+ $iterator = $this->applyBomStripping($iterator);
$iterator = $this->applyIteratorFilter($iterator);
$iterator = $this->applyIteratorSortBy($iterator);
- $iterator = $this->applyIteratorInterval($iterator);
- return $iterator;
+ return $this->applyIteratorInterval($iterator);
}
/**
@@ -239,6 +241,11 @@ class Reader extends AbstractCsv
throw new InvalidArgumentException('the specified row does not exist');
}
+ if (0 == $offset && $this->isBomStrippable()) {
+ $bom = $this->getInputBom();
+ $res[0] = mb_substr($res[0], mb_strlen($bom));
+ }
+
return $res;
}
diff --git a/test/ReaderTest.php b/test/ReaderTest.php
index 6d0ed4a..e9efb83 100644
--- a/test/ReaderTest.php
+++ b/test/ReaderTest.php
@@ -193,7 +193,62 @@ class ReaderTest extends PHPUnit_Framework_TestCase
}
/**
- * @expectedException InvalidArgumentException
+ * @param $expected
+ * @dataProvider validBOMSequences
+ */
+ public function testStripBOM($expected, $res)
+ {
+ $tmpFile = new SplTempFileObject();
+ foreach ($expected as $row) {
+ $tmpFile->fputcsv($row);
+ }
+ $csv = Reader::createFromFileObject($tmpFile);
+ $csv->setFlags(SplFileObject::READ_AHEAD|SplFileObject::SKIP_EMPTY);
+ $csv->stripBom(true);
+
+ $this->assertSame($res, $csv->fetchAll()[0][0]);
+ }
+
+ public function validBOMSequences()
+ {
+ return [
+ 'withBOM' => [[
+ [Reader::BOM_UTF16_LE.'john', 'doe', 'john.doe@example.com', ],
+ ['jane', 'doe', 'jane.doe@example.com', ],
+ ], 'john'],
+ 'withDoubleBOM' => [[
+ [Reader::BOM_UTF16_LE.Reader::BOM_UTF16_LE.'john', 'doe', 'john.doe@example.com', ],
+ ['jane', 'doe', 'jane.doe@example.com', ],
+ ], Reader::BOM_UTF16_LE.'john'],
+ 'withoutBOM' => [[
+ ['john', 'doe', 'john.doe@example.com', ],
+ ['jane', 'doe', 'jane.doe@example.com', ],
+ ], 'john'],
+ ];
+ }
+
+ public function testStripBOMWithFetchAssoc()
+ {
+ $tmpFile = new SplTempFileObject();
+ $expected = [
+ [Reader::BOM_UTF16_LE.'john', 'doe', 'john.doe@example.com', ],
+ ['jane', 'doe', 'jane.doe@example.com', ],
+ ];
+
+ $tmpFile = new SplTempFileObject();
+ foreach ($expected as $row) {
+ $tmpFile->fputcsv($row);
+ }
+ $csv = Reader::createFromFileObject($tmpFile);
+ $csv->setFlags(SplFileObject::READ_AHEAD|SplFileObject::SKIP_EMPTY);
+ $csv->stripBom(true);
+ $res = array_keys($csv->fetchAssoc()[0]);
+
+ $this->assertSame('john', $res[0]);
+ }
+
+ /**
+ * @expectedException \InvalidArgumentException
* @expectedExceptionMessage Use a flat non empty array with unique string values
*/
public function testFetchAssocKeyFailure()