...

Source file src/golang.org/x/mod/zip/zip.go

Documentation: golang.org/x/mod/zip

     1  // Copyright 2019 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package zip provides functions for creating and extracting module zip files.
     6  //
     7  // Module zip files have several restrictions listed below. These are necessary
     8  // to ensure that module zip files can be extracted consistently on supported
     9  // platforms and file systems.
    10  //
    11  // • All file paths within a zip file must start with "<module>@<version>/",
    12  // where "<module>" is the module path and "<version>" is the version.
    13  // The module path must be valid (see golang.org/x/mod/module.CheckPath).
    14  // The version must be valid and canonical (see
    15  // golang.org/x/mod/module.CanonicalVersion). The path must have a major
    16  // version suffix consistent with the version (see
    17  // golang.org/x/mod/module.Check). The part of the file path after the
    18  // "<module>@<version>/" prefix must be valid (see
    19  // golang.org/x/mod/module.CheckFilePath).
    20  //
    21  // • No two file paths may be equal under Unicode case-folding (see
    22  // strings.EqualFold).
    23  //
    24  // • A go.mod file may or may not appear in the top-level directory. If present,
    25  // it must be named "go.mod", not any other case. Files named "go.mod"
    26  // are not allowed in any other directory.
    27  //
    28  // • The total size in bytes of a module zip file may be at most MaxZipFile
    29  // bytes (500 MiB). The total uncompressed size of the files within the
    30  // zip may also be at most MaxZipFile bytes.
    31  //
    32  // • Each file's uncompressed size must match its declared 64-bit uncompressed
    33  // size in the zip file header.
    34  //
    35  // • If the zip contains files named "<module>@<version>/go.mod" or
    36  // "<module>@<version>/LICENSE", their sizes in bytes may be at most
    37  // MaxGoMod or MaxLICENSE, respectively (both are 16 MiB).
    38  //
    39  // • Empty directories are ignored. File permissions and timestamps are also
    40  // ignored.
    41  //
    42  // • Symbolic links and other irregular files are not allowed.
    43  //
    44  // Note that this package does not provide hashing functionality. See
    45  // golang.org/x/mod/sumdb/dirhash.
    46  package zip
    47  
    48  import (
    49  	"archive/zip"
    50  	"bytes"
    51  	"errors"
    52  	"fmt"
    53  	"io"
    54  	"io/ioutil"
    55  	"os"
    56  	"os/exec"
    57  	"path"
    58  	"path/filepath"
    59  	"strings"
    60  	"unicode"
    61  	"unicode/utf8"
    62  
    63  	"golang.org/x/mod/module"
    64  )
    65  
    66  const (
    67  	// MaxZipFile is the maximum size in bytes of a module zip file. The
    68  	// go command will report an error if either the zip file or its extracted
    69  	// content is larger than this.
    70  	MaxZipFile = 500 << 20
    71  
    72  	// MaxGoMod is the maximum size in bytes of a go.mod file within a
    73  	// module zip file.
    74  	MaxGoMod = 16 << 20
    75  
    76  	// MaxLICENSE is the maximum size in bytes of a LICENSE file within a
    77  	// module zip file.
    78  	MaxLICENSE = 16 << 20
    79  )
    80  
    81  // File provides an abstraction for a file in a directory, zip, or anything
    82  // else that looks like a file.
    83  type File interface {
    84  	// Path returns a clean slash-separated relative path from the module root
    85  	// directory to the file.
    86  	Path() string
    87  
    88  	// Lstat returns information about the file. If the file is a symbolic link,
    89  	// Lstat returns information about the link itself, not the file it points to.
    90  	Lstat() (os.FileInfo, error)
    91  
    92  	// Open provides access to the data within a regular file. Open may return
    93  	// an error if called on a directory or symbolic link.
    94  	Open() (io.ReadCloser, error)
    95  }
    96  
    97  // CheckedFiles reports whether a set of files satisfy the name and size
    98  // constraints required by module zip files. The constraints are listed in the
    99  // package documentation.
   100  //
   101  // Functions that produce this report may include slightly different sets of
   102  // files. See documentation for CheckFiles, CheckDir, and CheckZip for details.
   103  type CheckedFiles struct {
   104  	// Valid is a list of file paths that should be included in a zip file.
   105  	Valid []string
   106  
   107  	// Omitted is a list of files that are ignored when creating a module zip
   108  	// file, along with the reason each file is ignored.
   109  	Omitted []FileError
   110  
   111  	// Invalid is a list of files that should not be included in a module zip
   112  	// file, along with the reason each file is invalid.
   113  	Invalid []FileError
   114  
   115  	// SizeError is non-nil if the total uncompressed size of the valid files
   116  	// exceeds the module zip size limit or if the zip file itself exceeds the
   117  	// limit.
   118  	SizeError error
   119  }
   120  
   121  // Err returns an error if CheckedFiles does not describe a valid module zip
   122  // file. SizeError is returned if that field is set. A FileErrorList is returned
   123  // if there are one or more invalid files. Other errors may be returned in the
   124  // future.
   125  func (cf CheckedFiles) Err() error {
   126  	if cf.SizeError != nil {
   127  		return cf.SizeError
   128  	}
   129  	if len(cf.Invalid) > 0 {
   130  		return FileErrorList(cf.Invalid)
   131  	}
   132  	return nil
   133  }
   134  
   135  type FileErrorList []FileError
   136  
   137  func (el FileErrorList) Error() string {
   138  	buf := &strings.Builder{}
   139  	sep := ""
   140  	for _, e := range el {
   141  		buf.WriteString(sep)
   142  		buf.WriteString(e.Error())
   143  		sep = "\n"
   144  	}
   145  	return buf.String()
   146  }
   147  
   148  type FileError struct {
   149  	Path string
   150  	Err  error
   151  }
   152  
   153  func (e FileError) Error() string {
   154  	return fmt.Sprintf("%s: %s", e.Path, e.Err)
   155  }
   156  
   157  func (e FileError) Unwrap() error {
   158  	return e.Err
   159  }
   160  
   161  var (
   162  	// Predefined error messages for invalid files. Not exhaustive.
   163  	errPathNotClean    = errors.New("file path is not clean")
   164  	errPathNotRelative = errors.New("file path is not relative")
   165  	errGoModCase       = errors.New("go.mod files must have lowercase names")
   166  	errGoModSize       = fmt.Errorf("go.mod file too large (max size is %d bytes)", MaxGoMod)
   167  	errLICENSESize     = fmt.Errorf("LICENSE file too large (max size is %d bytes)", MaxLICENSE)
   168  
   169  	// Predefined error messages for omitted files. Not exhaustive.
   170  	errVCS           = errors.New("directory is a version control repository")
   171  	errVendored      = errors.New("file is in vendor directory")
   172  	errSubmoduleFile = errors.New("file is in another module")
   173  	errSubmoduleDir  = errors.New("directory is in another module")
   174  	errHgArchivalTxt = errors.New("file is inserted by 'hg archive' and is always omitted")
   175  	errSymlink       = errors.New("file is a symbolic link")
   176  	errNotRegular    = errors.New("not a regular file")
   177  )
   178  
   179  // CheckFiles reports whether a list of files satisfy the name and size
   180  // constraints listed in the package documentation. The returned CheckedFiles
   181  // record contains lists of valid, invalid, and omitted files. Every file in
   182  // the given list will be included in exactly one of those lists.
   183  //
   184  // CheckFiles returns an error if the returned CheckedFiles does not describe
   185  // a valid module zip file (according to CheckedFiles.Err). The returned
   186  // CheckedFiles is still populated when an error is returned.
   187  //
   188  // Note that CheckFiles will not open any files, so Create may still fail when
   189  // CheckFiles is successful due to I/O errors and reported size differences.
   190  func CheckFiles(files []File) (CheckedFiles, error) {
   191  	cf, _, _ := checkFiles(files)
   192  	return cf, cf.Err()
   193  }
   194  
   195  // checkFiles implements CheckFiles and also returns lists of valid files and
   196  // their sizes, corresponding to cf.Valid. It omits files in submodules, files
   197  // in vendored packages, symlinked files, and various other unwanted files.
   198  //
   199  // The lists returned are used in Create to avoid repeated calls to File.Lstat.
   200  func checkFiles(files []File) (cf CheckedFiles, validFiles []File, validSizes []int64) {
   201  	errPaths := make(map[string]struct{})
   202  	addError := func(path string, omitted bool, err error) {
   203  		if _, ok := errPaths[path]; ok {
   204  			return
   205  		}
   206  		errPaths[path] = struct{}{}
   207  		fe := FileError{Path: path, Err: err}
   208  		if omitted {
   209  			cf.Omitted = append(cf.Omitted, fe)
   210  		} else {
   211  			cf.Invalid = append(cf.Invalid, fe)
   212  		}
   213  	}
   214  
   215  	// Find directories containing go.mod files (other than the root).
   216  	// Files in these directories will be omitted.
   217  	// These directories will not be included in the output zip.
   218  	haveGoMod := make(map[string]bool)
   219  	for _, f := range files {
   220  		p := f.Path()
   221  		dir, base := path.Split(p)
   222  		if strings.EqualFold(base, "go.mod") {
   223  			info, err := f.Lstat()
   224  			if err != nil {
   225  				addError(p, false, err)
   226  				continue
   227  			}
   228  			if info.Mode().IsRegular() {
   229  				haveGoMod[dir] = true
   230  			}
   231  		}
   232  	}
   233  
   234  	inSubmodule := func(p string) bool {
   235  		for {
   236  			dir, _ := path.Split(p)
   237  			if dir == "" {
   238  				return false
   239  			}
   240  			if haveGoMod[dir] {
   241  				return true
   242  			}
   243  			p = dir[:len(dir)-1]
   244  		}
   245  	}
   246  
   247  	collisions := make(collisionChecker)
   248  	maxSize := int64(MaxZipFile)
   249  	for _, f := range files {
   250  		p := f.Path()
   251  		if p != path.Clean(p) {
   252  			addError(p, false, errPathNotClean)
   253  			continue
   254  		}
   255  		if path.IsAbs(p) {
   256  			addError(p, false, errPathNotRelative)
   257  			continue
   258  		}
   259  		if isVendoredPackage(p) {
   260  			// Skip files in vendored packages.
   261  			addError(p, true, errVendored)
   262  			continue
   263  		}
   264  		if inSubmodule(p) {
   265  			// Skip submodule files.
   266  			addError(p, true, errSubmoduleFile)
   267  			continue
   268  		}
   269  		if p == ".hg_archival.txt" {
   270  			// Inserted by hg archive.
   271  			// The go command drops this regardless of the VCS being used.
   272  			addError(p, true, errHgArchivalTxt)
   273  			continue
   274  		}
   275  		if err := module.CheckFilePath(p); err != nil {
   276  			addError(p, false, err)
   277  			continue
   278  		}
   279  		if strings.ToLower(p) == "go.mod" && p != "go.mod" {
   280  			addError(p, false, errGoModCase)
   281  			continue
   282  		}
   283  		info, err := f.Lstat()
   284  		if err != nil {
   285  			addError(p, false, err)
   286  			continue
   287  		}
   288  		if err := collisions.check(p, info.IsDir()); err != nil {
   289  			addError(p, false, err)
   290  			continue
   291  		}
   292  		if info.Mode()&os.ModeType == os.ModeSymlink {
   293  			// Skip symbolic links (golang.org/issue/27093).
   294  			addError(p, true, errSymlink)
   295  			continue
   296  		}
   297  		if !info.Mode().IsRegular() {
   298  			addError(p, true, errNotRegular)
   299  			continue
   300  		}
   301  		size := info.Size()
   302  		if size >= 0 && size <= maxSize {
   303  			maxSize -= size
   304  		} else if cf.SizeError == nil {
   305  			cf.SizeError = fmt.Errorf("module source tree too large (max size is %d bytes)", MaxZipFile)
   306  		}
   307  		if p == "go.mod" && size > MaxGoMod {
   308  			addError(p, false, errGoModSize)
   309  			continue
   310  		}
   311  		if p == "LICENSE" && size > MaxLICENSE {
   312  			addError(p, false, errLICENSESize)
   313  			continue
   314  		}
   315  
   316  		cf.Valid = append(cf.Valid, p)
   317  		validFiles = append(validFiles, f)
   318  		validSizes = append(validSizes, info.Size())
   319  	}
   320  
   321  	return cf, validFiles, validSizes
   322  }
   323  
   324  // CheckDir reports whether the files in dir satisfy the name and size
   325  // constraints listed in the package documentation. The returned CheckedFiles
   326  // record contains lists of valid, invalid, and omitted files. If a directory is
   327  // omitted (for example, a nested module or vendor directory), it will appear in
   328  // the omitted list, but its files won't be listed.
   329  //
   330  // CheckDir returns an error if it encounters an I/O error or if the returned
   331  // CheckedFiles does not describe a valid module zip file (according to
   332  // CheckedFiles.Err). The returned CheckedFiles is still populated when such
   333  // an error is returned.
   334  //
   335  // Note that CheckDir will not open any files, so CreateFromDir may still fail
   336  // when CheckDir is successful due to I/O errors.
   337  func CheckDir(dir string) (CheckedFiles, error) {
   338  	// List files (as CreateFromDir would) and check which ones are omitted
   339  	// or invalid.
   340  	files, omitted, err := listFilesInDir(dir)
   341  	if err != nil {
   342  		return CheckedFiles{}, err
   343  	}
   344  	cf, cfErr := CheckFiles(files)
   345  	_ = cfErr // ignore this error; we'll generate our own after rewriting paths.
   346  
   347  	// Replace all paths with file system paths.
   348  	// Paths returned by CheckFiles will be slash-separated paths relative to dir.
   349  	// That's probably not appropriate for error messages.
   350  	for i := range cf.Valid {
   351  		cf.Valid[i] = filepath.Join(dir, cf.Valid[i])
   352  	}
   353  	cf.Omitted = append(cf.Omitted, omitted...)
   354  	for i := range cf.Omitted {
   355  		cf.Omitted[i].Path = filepath.Join(dir, cf.Omitted[i].Path)
   356  	}
   357  	for i := range cf.Invalid {
   358  		cf.Invalid[i].Path = filepath.Join(dir, cf.Invalid[i].Path)
   359  	}
   360  	return cf, cf.Err()
   361  }
   362  
   363  // CheckZip reports whether the files contained in a zip file satisfy the name
   364  // and size constraints listed in the package documentation.
   365  //
   366  // CheckZip returns an error if the returned CheckedFiles does not describe
   367  // a valid module zip file (according to CheckedFiles.Err). The returned
   368  // CheckedFiles is still populated when an error is returned. CheckZip will
   369  // also return an error if the module path or version is malformed or if it
   370  // encounters an error reading the zip file.
   371  //
   372  // Note that CheckZip does not read individual files, so Unzip may still fail
   373  // when CheckZip is successful due to I/O errors.
   374  func CheckZip(m module.Version, zipFile string) (CheckedFiles, error) {
   375  	f, err := os.Open(zipFile)
   376  	if err != nil {
   377  		return CheckedFiles{}, err
   378  	}
   379  	defer f.Close()
   380  	_, cf, err := checkZip(m, f)
   381  	return cf, err
   382  }
   383  
   384  // checkZip implements checkZip and also returns the *zip.Reader. This is
   385  // used in Unzip to avoid redundant I/O.
   386  func checkZip(m module.Version, f *os.File) (*zip.Reader, CheckedFiles, error) {
   387  	// Make sure the module path and version are valid.
   388  	if vers := module.CanonicalVersion(m.Version); vers != m.Version {
   389  		return nil, CheckedFiles{}, fmt.Errorf("version %q is not canonical (should be %q)", m.Version, vers)
   390  	}
   391  	if err := module.Check(m.Path, m.Version); err != nil {
   392  		return nil, CheckedFiles{}, err
   393  	}
   394  
   395  	// Check the total file size.
   396  	info, err := f.Stat()
   397  	if err != nil {
   398  		return nil, CheckedFiles{}, err
   399  	}
   400  	zipSize := info.Size()
   401  	if zipSize > MaxZipFile {
   402  		cf := CheckedFiles{SizeError: fmt.Errorf("module zip file is too large (%d bytes; limit is %d bytes)", zipSize, MaxZipFile)}
   403  		return nil, cf, cf.Err()
   404  	}
   405  
   406  	// Check for valid file names, collisions.
   407  	var cf CheckedFiles
   408  	addError := func(zf *zip.File, err error) {
   409  		cf.Invalid = append(cf.Invalid, FileError{Path: zf.Name, Err: err})
   410  	}
   411  	z, err := zip.NewReader(f, zipSize)
   412  	if err != nil {
   413  		return nil, CheckedFiles{}, err
   414  	}
   415  	prefix := fmt.Sprintf("%s@%s/", m.Path, m.Version)
   416  	collisions := make(collisionChecker)
   417  	var size int64
   418  	for _, zf := range z.File {
   419  		if !strings.HasPrefix(zf.Name, prefix) {
   420  			addError(zf, fmt.Errorf("path does not have prefix %q", prefix))
   421  			continue
   422  		}
   423  		name := zf.Name[len(prefix):]
   424  		if name == "" {
   425  			continue
   426  		}
   427  		isDir := strings.HasSuffix(name, "/")
   428  		if isDir {
   429  			name = name[:len(name)-1]
   430  		}
   431  		if path.Clean(name) != name {
   432  			addError(zf, errPathNotClean)
   433  			continue
   434  		}
   435  		if err := module.CheckFilePath(name); err != nil {
   436  			addError(zf, err)
   437  			continue
   438  		}
   439  		if err := collisions.check(name, isDir); err != nil {
   440  			addError(zf, err)
   441  			continue
   442  		}
   443  		if isDir {
   444  			continue
   445  		}
   446  		if base := path.Base(name); strings.EqualFold(base, "go.mod") {
   447  			if base != name {
   448  				addError(zf, fmt.Errorf("go.mod file not in module root directory"))
   449  				continue
   450  			}
   451  			if name != "go.mod" {
   452  				addError(zf, errGoModCase)
   453  				continue
   454  			}
   455  		}
   456  		sz := int64(zf.UncompressedSize64)
   457  		if sz >= 0 && MaxZipFile-size >= sz {
   458  			size += sz
   459  		} else if cf.SizeError == nil {
   460  			cf.SizeError = fmt.Errorf("total uncompressed size of module contents too large (max size is %d bytes)", MaxZipFile)
   461  		}
   462  		if name == "go.mod" && sz > MaxGoMod {
   463  			addError(zf, fmt.Errorf("go.mod file too large (max size is %d bytes)", MaxGoMod))
   464  			continue
   465  		}
   466  		if name == "LICENSE" && sz > MaxLICENSE {
   467  			addError(zf, fmt.Errorf("LICENSE file too large (max size is %d bytes)", MaxLICENSE))
   468  			continue
   469  		}
   470  		cf.Valid = append(cf.Valid, zf.Name)
   471  	}
   472  
   473  	return z, cf, cf.Err()
   474  }
   475  
   476  // Create builds a zip archive for module m from an abstract list of files
   477  // and writes it to w.
   478  //
   479  // Create verifies the restrictions described in the package documentation
   480  // and should not produce an archive that Unzip cannot extract. Create does not
   481  // include files in the output archive if they don't belong in the module zip.
   482  // In particular, Create will not include files in modules found in
   483  // subdirectories, most files in vendor directories, or irregular files (such
   484  // as symbolic links) in the output archive.
   485  func Create(w io.Writer, m module.Version, files []File) (err error) {
   486  	defer func() {
   487  		if err != nil {
   488  			err = &zipError{verb: "create zip", err: err}
   489  		}
   490  	}()
   491  
   492  	// Check that the version is canonical, the module path is well-formed, and
   493  	// the major version suffix matches the major version.
   494  	if vers := module.CanonicalVersion(m.Version); vers != m.Version {
   495  		return fmt.Errorf("version %q is not canonical (should be %q)", m.Version, vers)
   496  	}
   497  	if err := module.Check(m.Path, m.Version); err != nil {
   498  		return err
   499  	}
   500  
   501  	// Check whether files are valid, not valid, or should be omitted.
   502  	// Also check that the valid files don't exceed the maximum size.
   503  	cf, validFiles, validSizes := checkFiles(files)
   504  	if err := cf.Err(); err != nil {
   505  		return err
   506  	}
   507  
   508  	// Create the module zip file.
   509  	zw := zip.NewWriter(w)
   510  	prefix := fmt.Sprintf("%s@%s/", m.Path, m.Version)
   511  
   512  	addFile := func(f File, path string, size int64) error {
   513  		rc, err := f.Open()
   514  		if err != nil {
   515  			return err
   516  		}
   517  		defer rc.Close()
   518  		w, err := zw.Create(prefix + path)
   519  		if err != nil {
   520  			return err
   521  		}
   522  		lr := &io.LimitedReader{R: rc, N: size + 1}
   523  		if _, err := io.Copy(w, lr); err != nil {
   524  			return err
   525  		}
   526  		if lr.N <= 0 {
   527  			return fmt.Errorf("file %q is larger than declared size", path)
   528  		}
   529  		return nil
   530  	}
   531  
   532  	for i, f := range validFiles {
   533  		p := f.Path()
   534  		size := validSizes[i]
   535  		if err := addFile(f, p, size); err != nil {
   536  			return err
   537  		}
   538  	}
   539  
   540  	return zw.Close()
   541  }
   542  
   543  // CreateFromDir creates a module zip file for module m from the contents of
   544  // a directory, dir. The zip content is written to w.
   545  //
   546  // CreateFromDir verifies the restrictions described in the package
   547  // documentation and should not produce an archive that Unzip cannot extract.
   548  // CreateFromDir does not include files in the output archive if they don't
   549  // belong in the module zip. In particular, CreateFromDir will not include
   550  // files in modules found in subdirectories, most files in vendor directories,
   551  // or irregular files (such as symbolic links) in the output archive.
   552  // Additionally, unlike Create, CreateFromDir will not include directories
   553  // named ".bzr", ".git", ".hg", or ".svn".
   554  func CreateFromDir(w io.Writer, m module.Version, dir string) (err error) {
   555  	defer func() {
   556  		if zerr, ok := err.(*zipError); ok {
   557  			zerr.path = dir
   558  		} else if err != nil {
   559  			err = &zipError{verb: "create zip from directory", path: dir, err: err}
   560  		}
   561  	}()
   562  
   563  	files, _, err := listFilesInDir(dir)
   564  	if err != nil {
   565  		return err
   566  	}
   567  
   568  	return Create(w, m, files)
   569  }
   570  
   571  // CreateFromVCS creates a module zip file for module m from the contents of a
   572  // VCS repository stored locally. The zip content is written to w.
   573  //
   574  // repoRoot must be an absolute path to the base of the repository, such as
   575  // "/Users/some-user/some-repo".
   576  //
   577  // revision is the revision of the repository to create the zip from. Examples
   578  // include HEAD or SHA sums for git repositories.
   579  //
   580  // subdir must be the relative path from the base of the repository, such as
   581  // "sub/dir". To create a zip from the base of the repository, pass an empty
   582  // string.
   583  //
   584  // If CreateFromVCS returns ErrUnrecognizedVCS, consider falling back to
   585  // CreateFromDir.
   586  func CreateFromVCS(w io.Writer, m module.Version, repoRoot, revision, subdir string) (err error) {
   587  	defer func() {
   588  		if zerr, ok := err.(*zipError); ok {
   589  			zerr.path = repoRoot
   590  		} else if err != nil {
   591  			err = &zipError{verb: "create zip from version control system", path: repoRoot, err: err}
   592  		}
   593  	}()
   594  
   595  	var filesToCreate []File
   596  
   597  	switch {
   598  	case isGitRepo(repoRoot):
   599  		files, err := filesInGitRepo(repoRoot, revision, subdir)
   600  		if err != nil {
   601  			return err
   602  		}
   603  
   604  		filesToCreate = files
   605  	default:
   606  		return &UnrecognizedVCSError{RepoRoot: repoRoot}
   607  	}
   608  
   609  	return Create(w, m, filesToCreate)
   610  }
   611  
   612  // UnrecognizedVCSError indicates that no recognized version control system was
   613  // found in the given directory.
   614  type UnrecognizedVCSError struct {
   615  	RepoRoot string
   616  }
   617  
   618  func (e *UnrecognizedVCSError) Error() string {
   619  	return fmt.Sprintf("could not find a recognized version control system at %q", e.RepoRoot)
   620  }
   621  
   622  // filesInGitRepo filters out any files that are git ignored in the directory.
   623  func filesInGitRepo(dir, rev, subdir string) ([]File, error) {
   624  	stderr := bytes.Buffer{}
   625  	stdout := bytes.Buffer{}
   626  
   627  	// Incredibly, git produces different archives depending on whether
   628  	// it is running on a Windows system or not, in an attempt to normalize
   629  	// text file line endings. Setting -c core.autocrlf=input means only
   630  	// translate files on the way into the repo, not on the way out (archive).
   631  	// The -c core.eol=lf should be unnecessary but set it anyway.
   632  	//
   633  	// Note: We use git archive to understand which files are actually included,
   634  	// ignoring things like .gitignore'd files. We could also use other
   635  	// techniques like git ls-files, but this approach most closely matches what
   636  	// the Go command does, which is beneficial.
   637  	//
   638  	// Note: some of this code copied from https://go.googlesource.com/go/+/refs/tags/go1.16.5/src/cmd/go/internal/modfetch/codehost/git.go#826.
   639  	cmd := exec.Command("git", "-c", "core.autocrlf=input", "-c", "core.eol=lf", "archive", "--format=zip", rev)
   640  	if subdir != "" {
   641  		cmd.Args = append(cmd.Args, subdir)
   642  	}
   643  	cmd.Dir = dir
   644  	cmd.Env = append(os.Environ(), "PWD="+dir)
   645  	cmd.Stdout = &stdout
   646  	cmd.Stderr = &stderr
   647  	if err := cmd.Run(); err != nil {
   648  		return nil, fmt.Errorf("error running `git archive`: %w, %s", err, stderr.String())
   649  	}
   650  
   651  	rawReader := bytes.NewReader(stdout.Bytes())
   652  	zipReader, err := zip.NewReader(rawReader, int64(stdout.Len()))
   653  	if err != nil {
   654  		return nil, err
   655  	}
   656  
   657  	var fs []File
   658  	for _, zf := range zipReader.File {
   659  		if !strings.HasPrefix(zf.Name, subdir) || strings.HasSuffix(zf.Name, "/") {
   660  			continue
   661  		}
   662  
   663  		n := strings.TrimPrefix(zf.Name, subdir)
   664  		if n == "" {
   665  			continue
   666  		}
   667  		n = strings.TrimPrefix(n, string(filepath.Separator))
   668  
   669  		fs = append(fs, zipFile{
   670  			name: n,
   671  			f:    zf,
   672  		})
   673  	}
   674  
   675  	return fs, nil
   676  }
   677  
   678  // isGitRepo reports whether the given directory is a git repo.
   679  func isGitRepo(dir string) bool {
   680  	stdout := &bytes.Buffer{}
   681  	cmd := exec.Command("git", "rev-parse", "--git-dir")
   682  	cmd.Dir = dir
   683  	cmd.Env = append(os.Environ(), "PWD="+dir)
   684  	cmd.Stdout = stdout
   685  	if err := cmd.Run(); err != nil {
   686  		return false
   687  	}
   688  	gitDir := strings.TrimSpace(stdout.String())
   689  	if !filepath.IsAbs(gitDir) {
   690  		gitDir = filepath.Join(dir, gitDir)
   691  	}
   692  	wantDir := filepath.Join(dir, ".git")
   693  	return wantDir == gitDir
   694  }
   695  
   696  type dirFile struct {
   697  	filePath, slashPath string
   698  	info                os.FileInfo
   699  }
   700  
   701  func (f dirFile) Path() string                 { return f.slashPath }
   702  func (f dirFile) Lstat() (os.FileInfo, error)  { return f.info, nil }
   703  func (f dirFile) Open() (io.ReadCloser, error) { return os.Open(f.filePath) }
   704  
   705  type zipFile struct {
   706  	name string
   707  	f    *zip.File
   708  }
   709  
   710  func (f zipFile) Path() string                 { return f.name }
   711  func (f zipFile) Lstat() (os.FileInfo, error)  { return f.f.FileInfo(), nil }
   712  func (f zipFile) Open() (io.ReadCloser, error) { return f.f.Open() }
   713  
   714  // isVendoredPackage attempts to report whether the given filename is contained
   715  // in a package whose import path contains (but does not end with) the component
   716  // "vendor".
   717  //
   718  // Unfortunately, isVendoredPackage reports false positives for files in any
   719  // non-top-level package whose import path ends in "vendor".
   720  func isVendoredPackage(name string) bool {
   721  	var i int
   722  	if strings.HasPrefix(name, "vendor/") {
   723  		i += len("vendor/")
   724  	} else if j := strings.Index(name, "/vendor/"); j >= 0 {
   725  		// This offset looks incorrect; this should probably be
   726  		//
   727  		// 	i = j + len("/vendor/")
   728  		//
   729  		// (See https://golang.org/issue/31562 and https://golang.org/issue/37397.)
   730  		// Unfortunately, we can't fix it without invalidating module checksums.
   731  		i += len("/vendor/")
   732  	} else {
   733  		return false
   734  	}
   735  	return strings.Contains(name[i:], "/")
   736  }
   737  
   738  // Unzip extracts the contents of a module zip file to a directory.
   739  //
   740  // Unzip checks all restrictions listed in the package documentation and returns
   741  // an error if the zip archive is not valid. In some cases, files may be written
   742  // to dir before an error is returned (for example, if a file's uncompressed
   743  // size does not match its declared size).
   744  //
   745  // dir may or may not exist: Unzip will create it and any missing parent
   746  // directories if it doesn't exist. If dir exists, it must be empty.
   747  func Unzip(dir string, m module.Version, zipFile string) (err error) {
   748  	defer func() {
   749  		if err != nil {
   750  			err = &zipError{verb: "unzip", path: zipFile, err: err}
   751  		}
   752  	}()
   753  
   754  	// Check that the directory is empty. Don't create it yet in case there's
   755  	// an error reading the zip.
   756  	if files, _ := ioutil.ReadDir(dir); len(files) > 0 {
   757  		return fmt.Errorf("target directory %v exists and is not empty", dir)
   758  	}
   759  
   760  	// Open the zip and check that it satisfies all restrictions.
   761  	f, err := os.Open(zipFile)
   762  	if err != nil {
   763  		return err
   764  	}
   765  	defer f.Close()
   766  	z, cf, err := checkZip(m, f)
   767  	if err != nil {
   768  		return err
   769  	}
   770  	if err := cf.Err(); err != nil {
   771  		return err
   772  	}
   773  
   774  	// Unzip, enforcing sizes declared in the zip file.
   775  	prefix := fmt.Sprintf("%s@%s/", m.Path, m.Version)
   776  	if err := os.MkdirAll(dir, 0777); err != nil {
   777  		return err
   778  	}
   779  	for _, zf := range z.File {
   780  		name := zf.Name[len(prefix):]
   781  		if name == "" || strings.HasSuffix(name, "/") {
   782  			continue
   783  		}
   784  		dst := filepath.Join(dir, name)
   785  		if err := os.MkdirAll(filepath.Dir(dst), 0777); err != nil {
   786  			return err
   787  		}
   788  		w, err := os.OpenFile(dst, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0444)
   789  		if err != nil {
   790  			return err
   791  		}
   792  		r, err := zf.Open()
   793  		if err != nil {
   794  			w.Close()
   795  			return err
   796  		}
   797  		lr := &io.LimitedReader{R: r, N: int64(zf.UncompressedSize64) + 1}
   798  		_, err = io.Copy(w, lr)
   799  		r.Close()
   800  		if err != nil {
   801  			w.Close()
   802  			return err
   803  		}
   804  		if err := w.Close(); err != nil {
   805  			return err
   806  		}
   807  		if lr.N <= 0 {
   808  			return fmt.Errorf("uncompressed size of file %s is larger than declared size (%d bytes)", zf.Name, zf.UncompressedSize64)
   809  		}
   810  	}
   811  
   812  	return nil
   813  }
   814  
   815  // collisionChecker finds case-insensitive name collisions and paths that
   816  // are listed as both files and directories.
   817  //
   818  // The keys of this map are processed with strToFold. pathInfo has the original
   819  // path for each folded path.
   820  type collisionChecker map[string]pathInfo
   821  
   822  type pathInfo struct {
   823  	path  string
   824  	isDir bool
   825  }
   826  
   827  func (cc collisionChecker) check(p string, isDir bool) error {
   828  	fold := strToFold(p)
   829  	if other, ok := cc[fold]; ok {
   830  		if p != other.path {
   831  			return fmt.Errorf("case-insensitive file name collision: %q and %q", other.path, p)
   832  		}
   833  		if isDir != other.isDir {
   834  			return fmt.Errorf("entry %q is both a file and a directory", p)
   835  		}
   836  		if !isDir {
   837  			return fmt.Errorf("multiple entries for file %q", p)
   838  		}
   839  		// It's not an error if check is called with the same directory multiple
   840  		// times. check is called recursively on parent directories, so check
   841  		// may be called on the same directory many times.
   842  	} else {
   843  		cc[fold] = pathInfo{path: p, isDir: isDir}
   844  	}
   845  
   846  	if parent := path.Dir(p); parent != "." {
   847  		return cc.check(parent, true)
   848  	}
   849  	return nil
   850  }
   851  
   852  // listFilesInDir walks the directory tree rooted at dir and returns a list of
   853  // files, as well as a list of directories and files that were skipped (for
   854  // example, nested modules and symbolic links).
   855  func listFilesInDir(dir string) (files []File, omitted []FileError, err error) {
   856  	err = filepath.Walk(dir, func(filePath string, info os.FileInfo, err error) error {
   857  		if err != nil {
   858  			return err
   859  		}
   860  		relPath, err := filepath.Rel(dir, filePath)
   861  		if err != nil {
   862  			return err
   863  		}
   864  		slashPath := filepath.ToSlash(relPath)
   865  
   866  		// Skip some subdirectories inside vendor, but maintain bug
   867  		// golang.org/issue/31562, described in isVendoredPackage.
   868  		// We would like Create and CreateFromDir to produce the same result
   869  		// for a set of files, whether expressed as a directory tree or zip.
   870  		if isVendoredPackage(slashPath) {
   871  			omitted = append(omitted, FileError{Path: slashPath, Err: errVendored})
   872  			return nil
   873  		}
   874  
   875  		if info.IsDir() {
   876  			if filePath == dir {
   877  				// Don't skip the top-level directory.
   878  				return nil
   879  			}
   880  
   881  			// Skip VCS directories.
   882  			// fossil repos are regular files with arbitrary names, so we don't try
   883  			// to exclude them.
   884  			switch filepath.Base(filePath) {
   885  			case ".bzr", ".git", ".hg", ".svn":
   886  				omitted = append(omitted, FileError{Path: slashPath, Err: errVCS})
   887  				return filepath.SkipDir
   888  			}
   889  
   890  			// Skip submodules (directories containing go.mod files).
   891  			if goModInfo, err := os.Lstat(filepath.Join(filePath, "go.mod")); err == nil && !goModInfo.IsDir() {
   892  				omitted = append(omitted, FileError{Path: slashPath, Err: errSubmoduleDir})
   893  				return filepath.SkipDir
   894  			}
   895  			return nil
   896  		}
   897  
   898  		// Skip irregular files and files in vendor directories.
   899  		// Irregular files are ignored. They're typically symbolic links.
   900  		if !info.Mode().IsRegular() {
   901  			omitted = append(omitted, FileError{Path: slashPath, Err: errNotRegular})
   902  			return nil
   903  		}
   904  
   905  		files = append(files, dirFile{
   906  			filePath:  filePath,
   907  			slashPath: slashPath,
   908  			info:      info,
   909  		})
   910  		return nil
   911  	})
   912  	if err != nil {
   913  		return nil, nil, err
   914  	}
   915  	return files, omitted, nil
   916  }
   917  
   918  type zipError struct {
   919  	verb, path string
   920  	err        error
   921  }
   922  
   923  func (e *zipError) Error() string {
   924  	if e.path == "" {
   925  		return fmt.Sprintf("%s: %v", e.verb, e.err)
   926  	} else {
   927  		return fmt.Sprintf("%s %s: %v", e.verb, e.path, e.err)
   928  	}
   929  }
   930  
   931  func (e *zipError) Unwrap() error {
   932  	return e.err
   933  }
   934  
   935  // strToFold returns a string with the property that
   936  //
   937  //	strings.EqualFold(s, t) iff strToFold(s) == strToFold(t)
   938  //
   939  // This lets us test a large set of strings for fold-equivalent
   940  // duplicates without making a quadratic number of calls
   941  // to EqualFold. Note that strings.ToUpper and strings.ToLower
   942  // do not have the desired property in some corner cases.
   943  func strToFold(s string) string {
   944  	// Fast path: all ASCII, no upper case.
   945  	// Most paths look like this already.
   946  	for i := 0; i < len(s); i++ {
   947  		c := s[i]
   948  		if c >= utf8.RuneSelf || 'A' <= c && c <= 'Z' {
   949  			goto Slow
   950  		}
   951  	}
   952  	return s
   953  
   954  Slow:
   955  	var buf bytes.Buffer
   956  	for _, r := range s {
   957  		// SimpleFold(x) cycles to the next equivalent rune > x
   958  		// or wraps around to smaller values. Iterate until it wraps,
   959  		// and we've found the minimum value.
   960  		for {
   961  			r0 := r
   962  			r = unicode.SimpleFold(r0)
   963  			if r <= r0 {
   964  				break
   965  			}
   966  		}
   967  		// Exception to allow fast path above: A-Z => a-z
   968  		if 'A' <= r && r <= 'Z' {
   969  			r += 'a' - 'A'
   970  		}
   971  		buf.WriteRune(r)
   972  	}
   973  	return buf.String()
   974  }
   975  

View as plain text