1
2
3
4
5 package tar
6
7 import (
8 "bytes"
9 "io"
10 "strconv"
11 "strings"
12 "time"
13 )
14
15
16
17
18 type Reader struct {
19 r io.Reader
20 pad int64
21 curr fileReader
22 blk block
23
24
25
26
27 err error
28 }
29
30 type fileReader interface {
31 io.Reader
32 fileState
33
34 WriteTo(io.Writer) (int64, error)
35 }
36
37
38 func NewReader(r io.Reader) *Reader {
39 return &Reader{r: r, curr: ®FileReader{r, 0}}
40 }
41
42
43
44
45
46
47 func (tr *Reader) Next() (*Header, error) {
48 if tr.err != nil {
49 return nil, tr.err
50 }
51 hdr, err := tr.next()
52 tr.err = err
53 return hdr, err
54 }
55
56 func (tr *Reader) next() (*Header, error) {
57 var paxHdrs map[string]string
58 var gnuLongName, gnuLongLink string
59
60
61
62
63
64
65 format := FormatUSTAR | FormatPAX | FormatGNU
66 for {
67
68 if err := discard(tr.r, tr.curr.physicalRemaining()); err != nil {
69 return nil, err
70 }
71 if _, err := tryReadFull(tr.r, tr.blk[:tr.pad]); err != nil {
72 return nil, err
73 }
74 tr.pad = 0
75
76 hdr, rawHdr, err := tr.readHeader()
77 if err != nil {
78 return nil, err
79 }
80 if err := tr.handleRegularFile(hdr); err != nil {
81 return nil, err
82 }
83 format.mayOnlyBe(hdr.Format)
84
85
86 switch hdr.Typeflag {
87 case TypeXHeader, TypeXGlobalHeader:
88 format.mayOnlyBe(FormatPAX)
89 paxHdrs, err = parsePAX(tr)
90 if err != nil {
91 return nil, err
92 }
93 if hdr.Typeflag == TypeXGlobalHeader {
94 mergePAX(hdr, paxHdrs)
95 return &Header{
96 Name: hdr.Name,
97 Typeflag: hdr.Typeflag,
98 Xattrs: hdr.Xattrs,
99 PAXRecords: hdr.PAXRecords,
100 Format: format,
101 }, nil
102 }
103 continue
104 case TypeGNULongName, TypeGNULongLink:
105 format.mayOnlyBe(FormatGNU)
106 realname, err := readSpecialFile(tr)
107 if err != nil {
108 return nil, err
109 }
110
111 var p parser
112 switch hdr.Typeflag {
113 case TypeGNULongName:
114 gnuLongName = p.parseString(realname)
115 case TypeGNULongLink:
116 gnuLongLink = p.parseString(realname)
117 }
118 continue
119 default:
120
121
122
123 if err := mergePAX(hdr, paxHdrs); err != nil {
124 return nil, err
125 }
126 if gnuLongName != "" {
127 hdr.Name = gnuLongName
128 }
129 if gnuLongLink != "" {
130 hdr.Linkname = gnuLongLink
131 }
132 if hdr.Typeflag == TypeRegA {
133 if strings.HasSuffix(hdr.Name, "/") {
134 hdr.Typeflag = TypeDir
135 } else {
136 hdr.Typeflag = TypeReg
137 }
138 }
139
140
141
142 if err := tr.handleRegularFile(hdr); err != nil {
143 return nil, err
144 }
145
146
147
148 if err := tr.handleSparseFile(hdr, rawHdr); err != nil {
149 return nil, err
150 }
151
152
153 if format.has(FormatUSTAR) && format.has(FormatPAX) {
154 format.mayOnlyBe(FormatUSTAR)
155 }
156 hdr.Format = format
157 return hdr, nil
158 }
159 }
160 }
161
162
163
164
165 func (tr *Reader) handleRegularFile(hdr *Header) error {
166 nb := hdr.Size
167 if isHeaderOnlyType(hdr.Typeflag) {
168 nb = 0
169 }
170 if nb < 0 {
171 return ErrHeader
172 }
173
174 tr.pad = blockPadding(nb)
175 tr.curr = ®FileReader{r: tr.r, nb: nb}
176 return nil
177 }
178
179
180
181 func (tr *Reader) handleSparseFile(hdr *Header, rawHdr *block) error {
182 var spd sparseDatas
183 var err error
184 if hdr.Typeflag == TypeGNUSparse {
185 spd, err = tr.readOldGNUSparseMap(hdr, rawHdr)
186 } else {
187 spd, err = tr.readGNUSparsePAXHeaders(hdr)
188 }
189
190
191
192 if err == nil && spd != nil {
193 if isHeaderOnlyType(hdr.Typeflag) || !validateSparseEntries(spd, hdr.Size) {
194 return ErrHeader
195 }
196 sph := invertSparseEntries(spd, hdr.Size)
197 tr.curr = &sparseFileReader{tr.curr, sph, 0}
198 }
199 return err
200 }
201
202
203
204
205
206 func (tr *Reader) readGNUSparsePAXHeaders(hdr *Header) (sparseDatas, error) {
207
208 var is1x0 bool
209 major, minor := hdr.PAXRecords[paxGNUSparseMajor], hdr.PAXRecords[paxGNUSparseMinor]
210 switch {
211 case major == "0" && (minor == "0" || minor == "1"):
212 is1x0 = false
213 case major == "1" && minor == "0":
214 is1x0 = true
215 case major != "" || minor != "":
216 return nil, nil
217 case hdr.PAXRecords[paxGNUSparseMap] != "":
218 is1x0 = false
219 default:
220 return nil, nil
221 }
222 hdr.Format.mayOnlyBe(FormatPAX)
223
224
225 if name := hdr.PAXRecords[paxGNUSparseName]; name != "" {
226 hdr.Name = name
227 }
228 size := hdr.PAXRecords[paxGNUSparseSize]
229 if size == "" {
230 size = hdr.PAXRecords[paxGNUSparseRealSize]
231 }
232 if size != "" {
233 n, err := strconv.ParseInt(size, 10, 64)
234 if err != nil {
235 return nil, ErrHeader
236 }
237 hdr.Size = n
238 }
239
240
241 if is1x0 {
242 return readGNUSparseMap1x0(tr.curr)
243 }
244 return readGNUSparseMap0x1(hdr.PAXRecords)
245 }
246
247
248 func mergePAX(hdr *Header, paxHdrs map[string]string) (err error) {
249 for k, v := range paxHdrs {
250 if v == "" {
251 continue
252 }
253 var id64 int64
254 switch k {
255 case paxPath:
256 hdr.Name = v
257 case paxLinkpath:
258 hdr.Linkname = v
259 case paxUname:
260 hdr.Uname = v
261 case paxGname:
262 hdr.Gname = v
263 case paxUid:
264 id64, err = strconv.ParseInt(v, 10, 64)
265 hdr.Uid = int(id64)
266 case paxGid:
267 id64, err = strconv.ParseInt(v, 10, 64)
268 hdr.Gid = int(id64)
269 case paxAtime:
270 hdr.AccessTime, err = parsePAXTime(v)
271 case paxMtime:
272 hdr.ModTime, err = parsePAXTime(v)
273 case paxCtime:
274 hdr.ChangeTime, err = parsePAXTime(v)
275 case paxSize:
276 hdr.Size, err = strconv.ParseInt(v, 10, 64)
277 default:
278 if strings.HasPrefix(k, paxSchilyXattr) {
279 if hdr.Xattrs == nil {
280 hdr.Xattrs = make(map[string]string)
281 }
282 hdr.Xattrs[k[len(paxSchilyXattr):]] = v
283 }
284 }
285 if err != nil {
286 return ErrHeader
287 }
288 }
289 hdr.PAXRecords = paxHdrs
290 return nil
291 }
292
293
294
295 func parsePAX(r io.Reader) (map[string]string, error) {
296 buf, err := readSpecialFile(r)
297 if err != nil {
298 return nil, err
299 }
300 sbuf := string(buf)
301
302
303
304
305 var sparseMap []string
306
307 paxHdrs := make(map[string]string)
308 for len(sbuf) > 0 {
309 key, value, residual, err := parsePAXRecord(sbuf)
310 if err != nil {
311 return nil, ErrHeader
312 }
313 sbuf = residual
314
315 switch key {
316 case paxGNUSparseOffset, paxGNUSparseNumBytes:
317
318 if (len(sparseMap)%2 == 0 && key != paxGNUSparseOffset) ||
319 (len(sparseMap)%2 == 1 && key != paxGNUSparseNumBytes) ||
320 strings.Contains(value, ",") {
321 return nil, ErrHeader
322 }
323 sparseMap = append(sparseMap, value)
324 default:
325 paxHdrs[key] = value
326 }
327 }
328 if len(sparseMap) > 0 {
329 paxHdrs[paxGNUSparseMap] = strings.Join(sparseMap, ",")
330 }
331 return paxHdrs, nil
332 }
333
334
335
336
337
338
339
340
341
342 func (tr *Reader) readHeader() (*Header, *block, error) {
343
344 if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil {
345 return nil, nil, err
346 }
347 if bytes.Equal(tr.blk[:], zeroBlock[:]) {
348 if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil {
349 return nil, nil, err
350 }
351 if bytes.Equal(tr.blk[:], zeroBlock[:]) {
352 return nil, nil, io.EOF
353 }
354 return nil, nil, ErrHeader
355 }
356
357
358 format := tr.blk.getFormat()
359 if format == FormatUnknown {
360 return nil, nil, ErrHeader
361 }
362
363 var p parser
364 hdr := new(Header)
365
366
367 v7 := tr.blk.toV7()
368 hdr.Typeflag = v7.typeFlag()[0]
369 hdr.Name = p.parseString(v7.name())
370 hdr.Linkname = p.parseString(v7.linkName())
371 hdr.Size = p.parseNumeric(v7.size())
372 hdr.Mode = p.parseNumeric(v7.mode())
373 hdr.Uid = int(p.parseNumeric(v7.uid()))
374 hdr.Gid = int(p.parseNumeric(v7.gid()))
375 hdr.ModTime = time.Unix(p.parseNumeric(v7.modTime()), 0)
376
377
378 if format > formatV7 {
379 ustar := tr.blk.toUSTAR()
380 hdr.Uname = p.parseString(ustar.userName())
381 hdr.Gname = p.parseString(ustar.groupName())
382 hdr.Devmajor = p.parseNumeric(ustar.devMajor())
383 hdr.Devminor = p.parseNumeric(ustar.devMinor())
384
385 var prefix string
386 switch {
387 case format.has(FormatUSTAR | FormatPAX):
388 hdr.Format = format
389 ustar := tr.blk.toUSTAR()
390 prefix = p.parseString(ustar.prefix())
391
392
393
394 notASCII := func(r rune) bool { return r >= 0x80 }
395 if bytes.IndexFunc(tr.blk[:], notASCII) >= 0 {
396 hdr.Format = FormatUnknown
397 }
398 nul := func(b []byte) bool { return int(b[len(b)-1]) == 0 }
399 if !(nul(v7.size()) && nul(v7.mode()) && nul(v7.uid()) && nul(v7.gid()) &&
400 nul(v7.modTime()) && nul(ustar.devMajor()) && nul(ustar.devMinor())) {
401 hdr.Format = FormatUnknown
402 }
403 case format.has(formatSTAR):
404 star := tr.blk.toSTAR()
405 prefix = p.parseString(star.prefix())
406 hdr.AccessTime = time.Unix(p.parseNumeric(star.accessTime()), 0)
407 hdr.ChangeTime = time.Unix(p.parseNumeric(star.changeTime()), 0)
408 case format.has(FormatGNU):
409 hdr.Format = format
410 var p2 parser
411 gnu := tr.blk.toGNU()
412 if b := gnu.accessTime(); b[0] != 0 {
413 hdr.AccessTime = time.Unix(p2.parseNumeric(b), 0)
414 }
415 if b := gnu.changeTime(); b[0] != 0 {
416 hdr.ChangeTime = time.Unix(p2.parseNumeric(b), 0)
417 }
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440 if p2.err != nil {
441 hdr.AccessTime, hdr.ChangeTime = time.Time{}, time.Time{}
442 ustar := tr.blk.toUSTAR()
443 if s := p.parseString(ustar.prefix()); isASCII(s) {
444 prefix = s
445 }
446 hdr.Format = FormatUnknown
447 }
448 }
449 if len(prefix) > 0 {
450 hdr.Name = prefix + "/" + hdr.Name
451 }
452 }
453 return hdr, &tr.blk, p.err
454 }
455
456
457
458
459
460
461
462
463
464 func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) (sparseDatas, error) {
465
466
467
468 if blk.getFormat() != FormatGNU {
469 return nil, ErrHeader
470 }
471 hdr.Format.mayOnlyBe(FormatGNU)
472
473 var p parser
474 hdr.Size = p.parseNumeric(blk.toGNU().realSize())
475 if p.err != nil {
476 return nil, p.err
477 }
478 s := blk.toGNU().sparse()
479 spd := make(sparseDatas, 0, s.maxEntries())
480 for {
481 for i := 0; i < s.maxEntries(); i++ {
482
483 if s.entry(i).offset()[0] == 0x00 {
484 break
485 }
486 offset := p.parseNumeric(s.entry(i).offset())
487 length := p.parseNumeric(s.entry(i).length())
488 if p.err != nil {
489 return nil, p.err
490 }
491 spd = append(spd, sparseEntry{Offset: offset, Length: length})
492 }
493
494 if s.isExtended()[0] > 0 {
495
496 if _, err := mustReadFull(tr.r, blk[:]); err != nil {
497 return nil, err
498 }
499 s = blk.toSparse()
500 continue
501 }
502 return spd, nil
503 }
504 }
505
506
507
508
509
510
511
512
513
514
515
516 func readGNUSparseMap1x0(r io.Reader) (sparseDatas, error) {
517 var (
518 cntNewline int64
519 buf bytes.Buffer
520 blk block
521 )
522
523
524
525 feedTokens := func(n int64) error {
526 for cntNewline < n {
527 if _, err := mustReadFull(r, blk[:]); err != nil {
528 return err
529 }
530 buf.Write(blk[:])
531 for _, c := range blk {
532 if c == '\n' {
533 cntNewline++
534 }
535 }
536 }
537 return nil
538 }
539
540
541
542 nextToken := func() string {
543 cntNewline--
544 tok, _ := buf.ReadString('\n')
545 return strings.TrimRight(tok, "\n")
546 }
547
548
549
550 if err := feedTokens(1); err != nil {
551 return nil, err
552 }
553 numEntries, err := strconv.ParseInt(nextToken(), 10, 0)
554 if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
555 return nil, ErrHeader
556 }
557
558
559
560
561 if err := feedTokens(2 * numEntries); err != nil {
562 return nil, err
563 }
564 spd := make(sparseDatas, 0, numEntries)
565 for i := int64(0); i < numEntries; i++ {
566 offset, err1 := strconv.ParseInt(nextToken(), 10, 64)
567 length, err2 := strconv.ParseInt(nextToken(), 10, 64)
568 if err1 != nil || err2 != nil {
569 return nil, ErrHeader
570 }
571 spd = append(spd, sparseEntry{Offset: offset, Length: length})
572 }
573 return spd, nil
574 }
575
576
577
578 func readGNUSparseMap0x1(paxHdrs map[string]string) (sparseDatas, error) {
579
580
581 numEntriesStr := paxHdrs[paxGNUSparseNumBlocks]
582 numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0)
583 if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
584 return nil, ErrHeader
585 }
586
587
588 sparseMap := strings.Split(paxHdrs[paxGNUSparseMap], ",")
589 if len(sparseMap) == 1 && sparseMap[0] == "" {
590 sparseMap = sparseMap[:0]
591 }
592 if int64(len(sparseMap)) != 2*numEntries {
593 return nil, ErrHeader
594 }
595
596
597
598 spd := make(sparseDatas, 0, numEntries)
599 for len(sparseMap) >= 2 {
600 offset, err1 := strconv.ParseInt(sparseMap[0], 10, 64)
601 length, err2 := strconv.ParseInt(sparseMap[1], 10, 64)
602 if err1 != nil || err2 != nil {
603 return nil, ErrHeader
604 }
605 spd = append(spd, sparseEntry{Offset: offset, Length: length})
606 sparseMap = sparseMap[2:]
607 }
608 return spd, nil
609 }
610
611
612
613
614
615
616
617
618
619
620
621 func (tr *Reader) Read(b []byte) (int, error) {
622 if tr.err != nil {
623 return 0, tr.err
624 }
625 n, err := tr.curr.Read(b)
626 if err != nil && err != io.EOF {
627 tr.err = err
628 }
629 return n, err
630 }
631
632
633
634
635
636
637
638
639
640
641
642 func (tr *Reader) writeTo(w io.Writer) (int64, error) {
643 if tr.err != nil {
644 return 0, tr.err
645 }
646 n, err := tr.curr.WriteTo(w)
647 if err != nil {
648 tr.err = err
649 }
650 return n, err
651 }
652
653
654 type regFileReader struct {
655 r io.Reader
656 nb int64
657 }
658
659 func (fr *regFileReader) Read(b []byte) (n int, err error) {
660 if int64(len(b)) > fr.nb {
661 b = b[:fr.nb]
662 }
663 if len(b) > 0 {
664 n, err = fr.r.Read(b)
665 fr.nb -= int64(n)
666 }
667 switch {
668 case err == io.EOF && fr.nb > 0:
669 return n, io.ErrUnexpectedEOF
670 case err == nil && fr.nb == 0:
671 return n, io.EOF
672 default:
673 return n, err
674 }
675 }
676
677 func (fr *regFileReader) WriteTo(w io.Writer) (int64, error) {
678 return io.Copy(w, struct{ io.Reader }{fr})
679 }
680
681
682 func (fr regFileReader) logicalRemaining() int64 {
683 return fr.nb
684 }
685
686
687 func (fr regFileReader) physicalRemaining() int64 {
688 return fr.nb
689 }
690
691
692 type sparseFileReader struct {
693 fr fileReader
694 sp sparseHoles
695 pos int64
696 }
697
698 func (sr *sparseFileReader) Read(b []byte) (n int, err error) {
699 finished := int64(len(b)) >= sr.logicalRemaining()
700 if finished {
701 b = b[:sr.logicalRemaining()]
702 }
703
704 b0 := b
705 endPos := sr.pos + int64(len(b))
706 for endPos > sr.pos && err == nil {
707 var nf int
708 holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset()
709 if sr.pos < holeStart {
710 bf := b[:min(int64(len(b)), holeStart-sr.pos)]
711 nf, err = tryReadFull(sr.fr, bf)
712 } else {
713 bf := b[:min(int64(len(b)), holeEnd-sr.pos)]
714 nf, err = tryReadFull(zeroReader{}, bf)
715 }
716 b = b[nf:]
717 sr.pos += int64(nf)
718 if sr.pos >= holeEnd && len(sr.sp) > 1 {
719 sr.sp = sr.sp[1:]
720 }
721 }
722
723 n = len(b0) - len(b)
724 switch {
725 case err == io.EOF:
726 return n, errMissData
727 case err != nil:
728 return n, err
729 case sr.logicalRemaining() == 0 && sr.physicalRemaining() > 0:
730 return n, errUnrefData
731 case finished:
732 return n, io.EOF
733 default:
734 return n, nil
735 }
736 }
737
738 func (sr *sparseFileReader) WriteTo(w io.Writer) (n int64, err error) {
739 ws, ok := w.(io.WriteSeeker)
740 if ok {
741 if _, err := ws.Seek(0, io.SeekCurrent); err != nil {
742 ok = false
743 }
744 }
745 if !ok {
746 return io.Copy(w, struct{ io.Reader }{sr})
747 }
748
749 var writeLastByte bool
750 pos0 := sr.pos
751 for sr.logicalRemaining() > 0 && !writeLastByte && err == nil {
752 var nf int64
753 holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset()
754 if sr.pos < holeStart {
755 nf = holeStart - sr.pos
756 nf, err = io.CopyN(ws, sr.fr, nf)
757 } else {
758 nf = holeEnd - sr.pos
759 if sr.physicalRemaining() == 0 {
760 writeLastByte = true
761 nf--
762 }
763 _, err = ws.Seek(nf, io.SeekCurrent)
764 }
765 sr.pos += nf
766 if sr.pos >= holeEnd && len(sr.sp) > 1 {
767 sr.sp = sr.sp[1:]
768 }
769 }
770
771
772
773 if writeLastByte && err == nil {
774 _, err = ws.Write([]byte{0})
775 sr.pos++
776 }
777
778 n = sr.pos - pos0
779 switch {
780 case err == io.EOF:
781 return n, errMissData
782 case err != nil:
783 return n, err
784 case sr.logicalRemaining() == 0 && sr.physicalRemaining() > 0:
785 return n, errUnrefData
786 default:
787 return n, nil
788 }
789 }
790
791 func (sr sparseFileReader) logicalRemaining() int64 {
792 return sr.sp[len(sr.sp)-1].endOffset() - sr.pos
793 }
794 func (sr sparseFileReader) physicalRemaining() int64 {
795 return sr.fr.physicalRemaining()
796 }
797
798 type zeroReader struct{}
799
800 func (zeroReader) Read(b []byte) (int, error) {
801 for i := range b {
802 b[i] = 0
803 }
804 return len(b), nil
805 }
806
807
808
809 func mustReadFull(r io.Reader, b []byte) (int, error) {
810 n, err := tryReadFull(r, b)
811 if err == io.EOF {
812 err = io.ErrUnexpectedEOF
813 }
814 return n, err
815 }
816
817
818
819 func tryReadFull(r io.Reader, b []byte) (n int, err error) {
820 for len(b) > n && err == nil {
821 var nn int
822 nn, err = r.Read(b[n:])
823 n += nn
824 }
825 if len(b) == n && err == io.EOF {
826 err = nil
827 }
828 return n, err
829 }
830
831
832
833 func readSpecialFile(r io.Reader) ([]byte, error) {
834 buf, err := io.ReadAll(io.LimitReader(r, maxSpecialFileSize+1))
835 if len(buf) > maxSpecialFileSize {
836 return nil, ErrFieldTooLong
837 }
838 return buf, err
839 }
840
841
842 func discard(r io.Reader, n int64) error {
843
844
845
846
847 var seekSkipped int64
848 if sr, ok := r.(io.Seeker); ok && n > 1 {
849
850
851
852
853 pos1, err := sr.Seek(0, io.SeekCurrent)
854 if pos1 >= 0 && err == nil {
855
856 pos2, err := sr.Seek(n-1, io.SeekCurrent)
857 if pos2 < 0 || err != nil {
858 return err
859 }
860 seekSkipped = pos2 - pos1
861 }
862 }
863
864 copySkipped, err := io.CopyN(io.Discard, r, n-seekSkipped)
865 if err == io.EOF && seekSkipped+copySkipped < n {
866 err = io.ErrUnexpectedEOF
867 }
868 return err
869 }
870
View as plain text