1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52 package csv
53
54 import (
55 "bufio"
56 "bytes"
57 "errors"
58 "fmt"
59 "io"
60 "unicode"
61 "unicode/utf8"
62 )
63
64
65
66 type ParseError struct {
67 StartLine int
68 Line int
69 Column int
70 Err error
71 }
72
73 func (e *ParseError) Error() string {
74 if e.Err == ErrFieldCount {
75 return fmt.Sprintf("record on line %d: %v", e.Line, e.Err)
76 }
77 if e.StartLine != e.Line {
78 return fmt.Sprintf("record on line %d; parse error on line %d, column %d: %v", e.StartLine, e.Line, e.Column, e.Err)
79 }
80 return fmt.Sprintf("parse error on line %d, column %d: %v", e.Line, e.Column, e.Err)
81 }
82
83 func (e *ParseError) Unwrap() error { return e.Err }
84
85
86 var (
87 ErrTrailingComma = errors.New("extra delimiter at end of line")
88 ErrBareQuote = errors.New("bare \" in non-quoted-field")
89 ErrQuote = errors.New("extraneous or missing \" in quoted-field")
90 ErrFieldCount = errors.New("wrong number of fields")
91 )
92
93 var errInvalidDelim = errors.New("csv: invalid field or comment delimiter")
94
95 func validDelim(r rune) bool {
96 return r != 0 && r != '"' && r != '\r' && r != '\n' && utf8.ValidRune(r) && r != utf8.RuneError
97 }
98
99
100
101
102
103
104
105
106
107
108 type Reader struct {
109
110
111
112
113 Comma rune
114
115
116
117
118
119
120
121
122 Comment rune
123
124
125
126
127
128
129
130 FieldsPerRecord int
131
132
133
134 LazyQuotes bool
135
136
137
138 TrimLeadingSpace bool
139
140
141
142
143 ReuseRecord bool
144
145 TrailingComma bool
146
147 r *bufio.Reader
148
149
150 numLine int
151
152
153 offset int64
154
155
156 rawBuffer []byte
157
158
159
160
161
162 recordBuffer []byte
163
164
165
166 fieldIndexes []int
167
168
169
170 fieldPositions []position
171
172
173 lastRecord []string
174 }
175
176
177 func NewReader(r io.Reader) *Reader {
178 return &Reader{
179 Comma: ',',
180 r: bufio.NewReader(r),
181 }
182 }
183
184
185
186
187
188
189
190
191
192 func (r *Reader) Read() (record []string, err error) {
193 if r.ReuseRecord {
194 record, err = r.readRecord(r.lastRecord)
195 r.lastRecord = record
196 } else {
197 record, err = r.readRecord(nil)
198 }
199 return record, err
200 }
201
202
203
204
205
206
207
208 func (r *Reader) FieldPos(field int) (line, column int) {
209 if field < 0 || field >= len(r.fieldPositions) {
210 panic("out of range index passed to FieldPos")
211 }
212 p := &r.fieldPositions[field]
213 return p.line, p.col
214 }
215
216
217
218
219 func (r *Reader) InputOffset() int64 {
220 return r.offset
221 }
222
223
224 type position struct {
225 line, col int
226 }
227
228
229
230
231
232
233 func (r *Reader) ReadAll() (records [][]string, err error) {
234 for {
235 record, err := r.readRecord(nil)
236 if err == io.EOF {
237 return records, nil
238 }
239 if err != nil {
240 return nil, err
241 }
242 records = append(records, record)
243 }
244 }
245
246
247
248
249
250 func (r *Reader) readLine() ([]byte, error) {
251 line, err := r.r.ReadSlice('\n')
252 if err == bufio.ErrBufferFull {
253 r.rawBuffer = append(r.rawBuffer[:0], line...)
254 for err == bufio.ErrBufferFull {
255 line, err = r.r.ReadSlice('\n')
256 r.rawBuffer = append(r.rawBuffer, line...)
257 }
258 line = r.rawBuffer
259 }
260 readSize := len(line)
261 if readSize > 0 && err == io.EOF {
262 err = nil
263
264 if line[readSize-1] == '\r' {
265 line = line[:readSize-1]
266 }
267 }
268 r.numLine++
269 r.offset += int64(readSize)
270
271 if n := len(line); n >= 2 && line[n-2] == '\r' && line[n-1] == '\n' {
272 line[n-2] = '\n'
273 line = line[:n-1]
274 }
275 return line, err
276 }
277
278
279 func lengthNL(b []byte) int {
280 if len(b) > 0 && b[len(b)-1] == '\n' {
281 return 1
282 }
283 return 0
284 }
285
286
287 func nextRune(b []byte) rune {
288 r, _ := utf8.DecodeRune(b)
289 return r
290 }
291
292 func (r *Reader) readRecord(dst []string) ([]string, error) {
293 if r.Comma == r.Comment || !validDelim(r.Comma) || (r.Comment != 0 && !validDelim(r.Comment)) {
294 return nil, errInvalidDelim
295 }
296
297
298 var line []byte
299 var errRead error
300 for errRead == nil {
301 line, errRead = r.readLine()
302 if r.Comment != 0 && nextRune(line) == r.Comment {
303 line = nil
304 continue
305 }
306 if errRead == nil && len(line) == lengthNL(line) {
307 line = nil
308 continue
309 }
310 break
311 }
312 if errRead == io.EOF {
313 return nil, errRead
314 }
315
316
317 var err error
318 const quoteLen = len(`"`)
319 commaLen := utf8.RuneLen(r.Comma)
320 recLine := r.numLine
321 r.recordBuffer = r.recordBuffer[:0]
322 r.fieldIndexes = r.fieldIndexes[:0]
323 r.fieldPositions = r.fieldPositions[:0]
324 pos := position{line: r.numLine, col: 1}
325 parseField:
326 for {
327 if r.TrimLeadingSpace {
328 i := bytes.IndexFunc(line, func(r rune) bool {
329 return !unicode.IsSpace(r)
330 })
331 if i < 0 {
332 i = len(line)
333 pos.col -= lengthNL(line)
334 }
335 line = line[i:]
336 pos.col += i
337 }
338 if len(line) == 0 || line[0] != '"' {
339
340 i := bytes.IndexRune(line, r.Comma)
341 field := line
342 if i >= 0 {
343 field = field[:i]
344 } else {
345 field = field[:len(field)-lengthNL(field)]
346 }
347
348 if !r.LazyQuotes {
349 if j := bytes.IndexByte(field, '"'); j >= 0 {
350 col := pos.col + j
351 err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrBareQuote}
352 break parseField
353 }
354 }
355 r.recordBuffer = append(r.recordBuffer, field...)
356 r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
357 r.fieldPositions = append(r.fieldPositions, pos)
358 if i >= 0 {
359 line = line[i+commaLen:]
360 pos.col += i + commaLen
361 continue parseField
362 }
363 break parseField
364 } else {
365
366 fieldPos := pos
367 line = line[quoteLen:]
368 pos.col += quoteLen
369 for {
370 i := bytes.IndexByte(line, '"')
371 if i >= 0 {
372
373 r.recordBuffer = append(r.recordBuffer, line[:i]...)
374 line = line[i+quoteLen:]
375 pos.col += i + quoteLen
376 switch rn := nextRune(line); {
377 case rn == '"':
378
379 r.recordBuffer = append(r.recordBuffer, '"')
380 line = line[quoteLen:]
381 pos.col += quoteLen
382 case rn == r.Comma:
383
384 line = line[commaLen:]
385 pos.col += commaLen
386 r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
387 r.fieldPositions = append(r.fieldPositions, fieldPos)
388 continue parseField
389 case lengthNL(line) == len(line):
390
391 r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
392 r.fieldPositions = append(r.fieldPositions, fieldPos)
393 break parseField
394 case r.LazyQuotes:
395
396 r.recordBuffer = append(r.recordBuffer, '"')
397 default:
398
399 err = &ParseError{StartLine: recLine, Line: r.numLine, Column: pos.col - quoteLen, Err: ErrQuote}
400 break parseField
401 }
402 } else if len(line) > 0 {
403
404 r.recordBuffer = append(r.recordBuffer, line...)
405 if errRead != nil {
406 break parseField
407 }
408 pos.col += len(line)
409 line, errRead = r.readLine()
410 if len(line) > 0 {
411 pos.line++
412 pos.col = 1
413 }
414 if errRead == io.EOF {
415 errRead = nil
416 }
417 } else {
418
419 if !r.LazyQuotes && errRead == nil {
420 err = &ParseError{StartLine: recLine, Line: pos.line, Column: pos.col, Err: ErrQuote}
421 break parseField
422 }
423 r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
424 r.fieldPositions = append(r.fieldPositions, fieldPos)
425 break parseField
426 }
427 }
428 }
429 }
430 if err == nil {
431 err = errRead
432 }
433
434
435
436 str := string(r.recordBuffer)
437 dst = dst[:0]
438 if cap(dst) < len(r.fieldIndexes) {
439 dst = make([]string, len(r.fieldIndexes))
440 }
441 dst = dst[:len(r.fieldIndexes)]
442 var preIdx int
443 for i, idx := range r.fieldIndexes {
444 dst[i] = str[preIdx:idx]
445 preIdx = idx
446 }
447
448
449 if r.FieldsPerRecord > 0 {
450 if len(dst) != r.FieldsPerRecord && err == nil {
451 err = &ParseError{
452 StartLine: recLine,
453 Line: recLine,
454 Column: 1,
455 Err: ErrFieldCount,
456 }
457 }
458 } else if r.FieldsPerRecord == 0 {
459 r.FieldsPerRecord = len(dst)
460 }
461 return dst, err
462 }
463
View as plain text