Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
100.00% covered (success)
100.00%
60 / 60
100.00% covered (success)
100.00%
12 / 12
CRAP
100.00% covered (success)
100.00%
1 / 1
CsvParser
100.00% covered (success)
100.00%
60 / 60
100.00% covered (success)
100.00%
12 / 12
32
100.00% covered (success)
100.00%
1 / 1
 setColumnHeadings
100.00% covered (success)
100.00%
11 / 11
100.00% covered (success)
100.00%
1 / 1
6
 getColumnHeadings
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 isSingleVisibleCharacter
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
2
 setFieldDelimiterChar
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
2
 getFieldDelimiterChar
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 setFieldEnclosureChar
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
2
 getFieldEnclosureChar
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 setEscapeChar
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
2
 getEscapeChar
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 setFirstRowContainsColumnHeadings
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getFirstRowContainsColumnHeadings
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 parseValue
100.00% covered (success)
100.00%
33 / 33
100.00% covered (success)
100.00%
1 / 1
11
 setMsgContent
n/a
0 / 0
n/a
0 / 0
1
1<?php
2/**
3 * @author: Doug Wilbourne (dougwilbourne@gmail.com)
4 */
5
6declare(strict_types=1);
7
8namespace pvc\parser\csv;
9
10use pvc\interfaces\msg\MsgInterface;
11use pvc\parser\err\CsvParserException;
12use pvc\parser\err\DuplicateColumnHeadingException;
13use pvc\parser\err\OpenFileException;
14use pvc\parser\err\InvalidColumnHeadingException;
15use pvc\parser\err\InvalidEscapeCharacterException;
16use pvc\parser\err\InvalidFieldDelimiterException;
17use pvc\parser\err\InvalidFieldEnclosureCharException;
18use pvc\parser\err\NonExistentColumnHeadingException;
19use pvc\parser\err\NonExistentFilePathException;
20use pvc\parser\Parser;
21use Throwable;
22
23/**
24 * Class CsvParser.  This class restricts record termination characters to be either LF or CRLF (windows).  PHP is
25 * supposed to automatically detect line endings with its verbs that extract lines from a file. This class uses fgetscsv
26 * and detects the presence of byte order marks at the beginning of the file
27 *
28 * @extends Parser<array>
29 */
30class CsvParser extends Parser
31{
32    /**
33     * @var string
34     */
35    protected string $filePath;
36
37    /**
38     * @var array <string>
39     */
40    protected array $columnHeadings;
41    /**
42     * @var non-empty-string
43     */
44    protected string $fieldDelimiterChar = ',';
45
46    /**
47     * @var string
48     */
49    protected string $fieldEnclosureChar = "\"";
50
51    /**
52     * @var string
53     */
54    protected string $escapeChar = "\\";
55
56    /**
57     * @var bool
58     */
59    protected bool $firstRowContainsColumnHeadings = false;
60
61    /**
62     * setColumnHeadings
63     * @param array<string> $columnHeadings
64     * @throws InvalidColumnHeadingException|DuplicateColumnHeadingException
65     */
66    public function setColumnHeadings(array $columnHeadings) : void
67    {
68        if (empty($columnHeadings)) {
69            throw new NonExistentColumnHeadingException();
70        }
71
72        /**
73         * re-initialize the attribute so that successive calls to the parser work properly
74         */
75        $this->columnHeadings = [];
76
77        foreach ($columnHeadings as $columnHeading) {
78            /**
79             * must be a string
80             */
81            if (!is_string($columnHeading)) {
82                throw new InvalidColumnHeadingException();
83            }
84
85            /**
86             * characters in the column heading must all be visible
87             */
88            if (!ctype_graph($columnHeading)) {
89                throw new InvalidColumnHeadingException();
90            }
91
92            /**
93             * no duplicate column headings since they become indices into an array
94             */
95            if (in_array($columnHeading, $this->columnHeadings)) {
96                throw new DuplicateColumnHeadingException($columnHeading);
97            }
98
99            $this->columnHeadings[] = $columnHeading;
100        }
101    }
102
103    /**
104     * getColumnHeadings
105     * @return array<string>
106     */
107    public function getColumnHeadings() : array
108    {
109        return $this->columnHeadings;
110    }
111
112    protected function isSingleVisibleCharacter(string $char): bool
113    {
114        return (ctype_graph($char) && (strlen($char) == 1));
115    }
116
117    /**
118     * setFieldDelimiterChar
119     * @param non-empty-string $delimiterChar
120     * @throws InvalidFieldDelimiterException
121     */
122    public function setFieldDelimiterChar(string $delimiterChar) : void
123    {
124        /** field delimiter must be a single visible character */
125        if (!$this->isSingleVisibleCharacter($delimiterChar)) {
126            throw new InvalidFieldDelimiterException();
127        }
128        $this->fieldDelimiterChar = $delimiterChar;
129    }
130
131    /**
132     * getFieldDelimiterChar
133     * @return string
134     */
135    public function getFieldDelimiterChar() : string
136    {
137        return $this->fieldDelimiterChar;
138    }
139
140    /**
141     * setFieldEnclosureChar
142     * @param string $enclosureChar
143     * @throws InvalidFieldEnclosureCharException
144     */
145    public function setFieldEnclosureChar(string $enclosureChar) : void
146    {
147        /** field enclosure must be a single visible character  */
148        if (!$this->isSingleVisibleCharacter($enclosureChar)) {
149            throw new InvalidFieldEnclosureCharException();
150        }
151        $this->fieldEnclosureChar = $enclosureChar;
152    }
153
154    /**
155     * getFieldEnclosureChar
156     * @return string
157     */
158    public function getFieldEnclosureChar() : string
159    {
160        return $this->fieldEnclosureChar;
161    }
162
163    /**
164     * setEscapeChar
165     * @param string $escapeChar
166     * @throws InvalidEscapeCharacterException
167     */
168    public function setEscapeChar(string $escapeChar) : void
169    {
170        /** escape character must be a single visible character */
171        if (!$this->isSingleVisibleCharacter($escapeChar)) {
172            throw new InvalidEscapeCharacterException();
173        }
174        $this->escapeChar = $escapeChar;
175    }
176
177    /**
178     * getEscapeChar
179     * @return string
180     */
181    public function getEscapeChar() : string
182    {
183        return $this->escapeChar;
184    }
185
186    public function setFirstRowContainsColumnHeadings(bool $value) : void
187    {
188        $this->firstRowContainsColumnHeadings = $value;
189    }
190
191    public function getFirstRowContainsColumnHeadings() : bool
192    {
193        return $this->firstRowContainsColumnHeadings;
194    }
195
196    /**
197     * parse
198     * @param string $data
199     * @return bool
200     * @throws NonExistentColumnHeadingException
201     */
202    protected function parseValue(string $data): bool
203    {
204        try {
205            $handle = fopen($data, 'r');
206        } catch (Throwable $e) {
207            throw new OpenFileException($data, $e);
208        }
209        $this->filePath = $data;
210
211        $bom = "\xef\xbb\xbf";
212        if (fgets($handle, 4) !== $bom) {
213            // BOM not found - rewind pointer to start of file.
214            rewind($handle);
215        }
216
217        $rows = [];
218        while (false !== ($line = (fgetcsv(
219            $handle,
220            null,
221            $this->getFieldDelimiterChar(),
222            $this->getFieldEnclosureChar(),
223            $this->getEscapeChar()
224        )))) {
225            /**
226             * fgetcsv returns an array with a single element consisting of a null value if the line is empty
227             */
228            if (!is_null($line[0])) {
229                $rows[] = $line;
230            }
231        }
232
233        /**
234         * if we are not at the end of the file then fgetcsv returned false because it could not parse a line.
235         */
236        if (!feof($handle)) {
237            throw new CsvParserException($data);
238        }
239        fclose($handle);
240
241        /**
242         * array_combine would automatically convert invalid array keys to strings, but it will not check
243         * for duplicate column names or verify that all the characters are graphic (e.g. visible), so the
244         * setColumnHeadings method ensures those things.  Also, array_combine fails if the number of headings does
245         * not match the number of elements in each and every row of data.  It is certainly possible to ensure the
246         * shapes match and reshape as necessary, but it's about as much trouble as handling each row manually......
247         */
248
249        if ($this->getFirstRowContainsColumnHeadings()) {
250            $firstRow = array_shift($rows);
251            if ($firstRow) {
252                $this->setColumnHeadings($firstRow);
253            }
254
255            foreach ($rows as $row) {
256                $newRow = [];
257                foreach ($row as $index => $element) {
258                    if (isset($this->columnHeadings[$index])) {
259                        $newRow[$this->columnHeadings[$index]] = $element;
260                    } else {
261                        $newRow[$index] = $element;
262                    }
263                }
264                $this->parsedValue[] = $newRow;
265            }
266        } else {
267            $this->parsedValue = $rows;
268        }
269
270        return true;
271    }
272
273    public function setMsgContent(MsgInterface $msg): void {}
274}