read.go

Documentation: golang.org/x/mod/modfile

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package modfile
     6  
     7  import (
     8  	"bytes"
     9  	"errors"
    10  	"fmt"
    11  	"os"
    12  	"strconv"
    13  	"strings"
    14  	"unicode"
    15  	"unicode/utf8"
    16  )
    17  
    18  // A Position describes an arbitrary source position in a file, including the
    19  // file, line, column, and byte offset.
    20  type Position struct {
    21  	Line     int // line in input (starting at 1)
    22  	LineRune int // rune in line (starting at 1)
    23  	Byte     int // byte in input (starting at 0)
    24  }
    25  
    26  // add returns the position at the end of s, assuming it starts at p.
    27  func (p Position) add(s string) Position {
    28  	p.Byte += len(s)
    29  	if n := strings.Count(s, "\n"); n > 0 {
    30  		p.Line += n
    31  		s = s[strings.LastIndex(s, "\n")+1:]
    32  		p.LineRune = 1
    33  	}
    34  	p.LineRune += utf8.RuneCountInString(s)
    35  	return p
    36  }
    37  
    38  // An Expr represents an input element.
    39  type Expr interface {
    40  	// Span returns the start and end position of the expression,
    41  	// excluding leading or trailing comments.
    42  	Span() (start, end Position)
    43  
    44  	// Comment returns the comments attached to the expression.
    45  	// This method would normally be named 'Comments' but that
    46  	// would interfere with embedding a type of the same name.
    47  	Comment() *Comments
    48  }
    49  
    50  // A Comment represents a single // comment.
    51  type Comment struct {
    52  	Start  Position
    53  	Token  string // without trailing newline
    54  	Suffix bool   // an end of line (not whole line) comment
    55  }
    56  
    57  // Comments collects the comments associated with an expression.
    58  type Comments struct {
    59  	Before []Comment // whole-line comments before this expression
    60  	Suffix []Comment // end-of-line comments after this expression
    61  
    62  	// For top-level expressions only, After lists whole-line
    63  	// comments following the expression.
    64  	After []Comment
    65  }
    66  
    67  // Comment returns the receiver. This isn't useful by itself, but
    68  // a Comments struct is embedded into all the expression
    69  // implementation types, and this gives each of those a Comment
    70  // method to satisfy the Expr interface.
    71  func (c *Comments) Comment() *Comments {
    72  	return c
    73  }
    74  
    75  // A FileSyntax represents an entire go.mod file.
    76  type FileSyntax struct {
    77  	Name string // file path
    78  	Comments
    79  	Stmt []Expr
    80  }
    81  
    82  func (x *FileSyntax) Span() (start, end Position) {
    83  	if len(x.Stmt) == 0 {
    84  		return
    85  	}
    86  	start, _ = x.Stmt[0].Span()
    87  	_, end = x.Stmt[len(x.Stmt)-1].Span()
    88  	return start, end
    89  }
    90  
    91  // addLine adds a line containing the given tokens to the file.
    92  //
    93  // If the first token of the hint matches the first token of the
    94  // line, the new line is added at the end of the block containing hint,
    95  // extracting hint into a new block if it is not yet in one.
    96  //
    97  // If the hint is non-nil buts its first token does not match,
    98  // the new line is added after the block containing hint
    99  // (or hint itself, if not in a block).
   100  //
   101  // If no hint is provided, addLine appends the line to the end of
   102  // the last block with a matching first token,
   103  // or to the end of the file if no such block exists.
   104  func (x *FileSyntax) addLine(hint Expr, tokens ...string) *Line {
   105  	if hint == nil {
   106  		// If no hint given, add to the last statement of the given type.
   107  	Loop:
   108  		for i := len(x.Stmt) - 1; i >= 0; i-- {
   109  			stmt := x.Stmt[i]
   110  			switch stmt := stmt.(type) {
   111  			case *Line:
   112  				if stmt.Token != nil && stmt.Token[0] == tokens[0] {
   113  					hint = stmt
   114  					break Loop
   115  				}
   116  			case *LineBlock:
   117  				if stmt.Token[0] == tokens[0] {
   118  					hint = stmt
   119  					break Loop
   120  				}
   121  			}
   122  		}
   123  	}
   124  
   125  	newLineAfter := func(i int) *Line {
   126  		new := &Line{Token: tokens}
   127  		if i == len(x.Stmt) {
   128  			x.Stmt = append(x.Stmt, new)
   129  		} else {
   130  			x.Stmt = append(x.Stmt, nil)
   131  			copy(x.Stmt[i+2:], x.Stmt[i+1:])
   132  			x.Stmt[i+1] = new
   133  		}
   134  		return new
   135  	}
   136  
   137  	if hint != nil {
   138  		for i, stmt := range x.Stmt {
   139  			switch stmt := stmt.(type) {
   140  			case *Line:
   141  				if stmt == hint {
   142  					if stmt.Token == nil || stmt.Token[0] != tokens[0] {
   143  						return newLineAfter(i)
   144  					}
   145  
   146  					// Convert line to line block.
   147  					stmt.InBlock = true
   148  					block := &LineBlock{Token: stmt.Token[:1], Line: []*Line{stmt}}
   149  					stmt.Token = stmt.Token[1:]
   150  					x.Stmt[i] = block
   151  					new := &Line{Token: tokens[1:], InBlock: true}
   152  					block.Line = append(block.Line, new)
   153  					return new
   154  				}
   155  
   156  			case *LineBlock:
   157  				if stmt == hint {
   158  					if stmt.Token[0] != tokens[0] {
   159  						return newLineAfter(i)
   160  					}
   161  
   162  					new := &Line{Token: tokens[1:], InBlock: true}
   163  					stmt.Line = append(stmt.Line, new)
   164  					return new
   165  				}
   166  
   167  				for j, line := range stmt.Line {
   168  					if line == hint {
   169  						if stmt.Token[0] != tokens[0] {
   170  							return newLineAfter(i)
   171  						}
   172  
   173  						// Add new line after hint within the block.
   174  						stmt.Line = append(stmt.Line, nil)
   175  						copy(stmt.Line[j+2:], stmt.Line[j+1:])
   176  						new := &Line{Token: tokens[1:], InBlock: true}
   177  						stmt.Line[j+1] = new
   178  						return new
   179  					}
   180  				}
   181  			}
   182  		}
   183  	}
   184  
   185  	new := &Line{Token: tokens}
   186  	x.Stmt = append(x.Stmt, new)
   187  	return new
   188  }
   189  
   190  func (x *FileSyntax) updateLine(line *Line, tokens ...string) {
   191  	if line.InBlock {
   192  		tokens = tokens[1:]
   193  	}
   194  	line.Token = tokens
   195  }
   196  
   197  // markRemoved modifies line so that it (and its end-of-line comment, if any)
   198  // will be dropped by (*FileSyntax).Cleanup.
   199  func (line *Line) markRemoved() {
   200  	line.Token = nil
   201  	line.Comments.Suffix = nil
   202  }
   203  
   204  // Cleanup cleans up the file syntax x after any edit operations.
   205  // To avoid quadratic behavior, (*Line).markRemoved marks the line as dead
   206  // by setting line.Token = nil but does not remove it from the slice
   207  // in which it appears. After edits have all been indicated,
   208  // calling Cleanup cleans out the dead lines.
   209  func (x *FileSyntax) Cleanup() {
   210  	w := 0
   211  	for _, stmt := range x.Stmt {
   212  		switch stmt := stmt.(type) {
   213  		case *Line:
   214  			if stmt.Token == nil {
   215  				continue
   216  			}
   217  		case *LineBlock:
   218  			ww := 0
   219  			for _, line := range stmt.Line {
   220  				if line.Token != nil {
   221  					stmt.Line[ww] = line
   222  					ww++
   223  				}
   224  			}
   225  			if ww == 0 {
   226  				continue
   227  			}
   228  			if ww == 1 {
   229  				// Collapse block into single line.
   230  				line := &Line{
   231  					Comments: Comments{
   232  						Before: commentsAdd(stmt.Before, stmt.Line[0].Before),
   233  						Suffix: commentsAdd(stmt.Line[0].Suffix, stmt.Suffix),
   234  						After:  commentsAdd(stmt.Line[0].After, stmt.After),
   235  					},
   236  					Token: stringsAdd(stmt.Token, stmt.Line[0].Token),
   237  				}
   238  				x.Stmt[w] = line
   239  				w++
   240  				continue
   241  			}
   242  			stmt.Line = stmt.Line[:ww]
   243  		}
   244  		x.Stmt[w] = stmt
   245  		w++
   246  	}
   247  	x.Stmt = x.Stmt[:w]
   248  }
   249  
   250  func commentsAdd(x, y []Comment) []Comment {
   251  	return append(x[:len(x):len(x)], y...)
   252  }
   253  
   254  func stringsAdd(x, y []string) []string {
   255  	return append(x[:len(x):len(x)], y...)
   256  }
   257  
   258  // A CommentBlock represents a top-level block of comments separate
   259  // from any rule.
   260  type CommentBlock struct {
   261  	Comments
   262  	Start Position
   263  }
   264  
   265  func (x *CommentBlock) Span() (start, end Position) {
   266  	return x.Start, x.Start
   267  }
   268  
   269  // A Line is a single line of tokens.
   270  type Line struct {
   271  	Comments
   272  	Start   Position
   273  	Token   []string
   274  	InBlock bool
   275  	End     Position
   276  }
   277  
   278  func (x *Line) Span() (start, end Position) {
   279  	return x.Start, x.End
   280  }
   281  
   282  // A LineBlock is a factored block of lines, like
   283  //
   284  //	require (
   285  //		"x"
   286  //		"y"
   287  //	)
   288  type LineBlock struct {
   289  	Comments
   290  	Start  Position
   291  	LParen LParen
   292  	Token  []string
   293  	Line   []*Line
   294  	RParen RParen
   295  }
   296  
   297  func (x *LineBlock) Span() (start, end Position) {
   298  	return x.Start, x.RParen.Pos.add(")")
   299  }
   300  
   301  // An LParen represents the beginning of a parenthesized line block.
   302  // It is a place to store suffix comments.
   303  type LParen struct {
   304  	Comments
   305  	Pos Position
   306  }
   307  
   308  func (x *LParen) Span() (start, end Position) {
   309  	return x.Pos, x.Pos.add(")")
   310  }
   311  
   312  // An RParen represents the end of a parenthesized line block.
   313  // It is a place to store whole-line (before) comments.
   314  type RParen struct {
   315  	Comments
   316  	Pos Position
   317  }
   318  
   319  func (x *RParen) Span() (start, end Position) {
   320  	return x.Pos, x.Pos.add(")")
   321  }
   322  
   323  // An input represents a single input file being parsed.
   324  type input struct {
   325  	// Lexing state.
   326  	filename   string    // name of input file, for errors
   327  	complete   []byte    // entire input
   328  	remaining  []byte    // remaining input
   329  	tokenStart []byte    // token being scanned to end of input
   330  	token      token     // next token to be returned by lex, peek
   331  	pos        Position  // current input position
   332  	comments   []Comment // accumulated comments
   333  
   334  	// Parser state.
   335  	file        *FileSyntax // returned top-level syntax tree
   336  	parseErrors ErrorList   // errors encountered during parsing
   337  
   338  	// Comment assignment state.
   339  	pre  []Expr // all expressions, in preorder traversal
   340  	post []Expr // all expressions, in postorder traversal
   341  }
   342  
   343  func newInput(filename string, data []byte) *input {
   344  	return &input{
   345  		filename:  filename,
   346  		complete:  data,
   347  		remaining: data,
   348  		pos:       Position{Line: 1, LineRune: 1, Byte: 0},
   349  	}
   350  }
   351  
   352  // parse parses the input file.
   353  func parse(file string, data []byte) (f *FileSyntax, err error) {
   354  	// The parser panics for both routine errors like syntax errors
   355  	// and for programmer bugs like array index errors.
   356  	// Turn both into error returns. Catching bug panics is
   357  	// especially important when processing many files.
   358  	in := newInput(file, data)
   359  	defer func() {
   360  		if e := recover(); e != nil && e != &in.parseErrors {
   361  			in.parseErrors = append(in.parseErrors, Error{
   362  				Filename: in.filename,
   363  				Pos:      in.pos,
   364  				Err:      fmt.Errorf("internal error: %v", e),
   365  			})
   366  		}
   367  		if err == nil && len(in.parseErrors) > 0 {
   368  			err = in.parseErrors
   369  		}
   370  	}()
   371  
   372  	// Prime the lexer by reading in the first token. It will be available
   373  	// in the next peek() or lex() call.
   374  	in.readToken()
   375  
   376  	// Invoke the parser.
   377  	in.parseFile()
   378  	if len(in.parseErrors) > 0 {
   379  		return nil, in.parseErrors
   380  	}
   381  	in.file.Name = in.filename
   382  
   383  	// Assign comments to nearby syntax.
   384  	in.assignComments()
   385  
   386  	return in.file, nil
   387  }
   388  
   389  // Error is called to report an error.
   390  // Error does not return: it panics.
   391  func (in *input) Error(s string) {
   392  	in.parseErrors = append(in.parseErrors, Error{
   393  		Filename: in.filename,
   394  		Pos:      in.pos,
   395  		Err:      errors.New(s),
   396  	})
   397  	panic(&in.parseErrors)
   398  }
   399  
   400  // eof reports whether the input has reached end of file.
   401  func (in *input) eof() bool {
   402  	return len(in.remaining) == 0
   403  }
   404  
   405  // peekRune returns the next rune in the input without consuming it.
   406  func (in *input) peekRune() int {
   407  	if len(in.remaining) == 0 {
   408  		return 0
   409  	}
   410  	r, _ := utf8.DecodeRune(in.remaining)
   411  	return int(r)
   412  }
   413  
   414  // peekPrefix reports whether the remaining input begins with the given prefix.
   415  func (in *input) peekPrefix(prefix string) bool {
   416  	// This is like bytes.HasPrefix(in.remaining, []byte(prefix))
   417  	// but without the allocation of the []byte copy of prefix.
   418  	for i := 0; i < len(prefix); i++ {
   419  		if i >= len(in.remaining) || in.remaining[i] != prefix[i] {
   420  			return false
   421  		}
   422  	}
   423  	return true
   424  }
   425  
   426  // readRune consumes and returns the next rune in the input.
   427  func (in *input) readRune() int {
   428  	if len(in.remaining) == 0 {
   429  		in.Error("internal lexer error: readRune at EOF")
   430  	}
   431  	r, size := utf8.DecodeRune(in.remaining)
   432  	in.remaining = in.remaining[size:]
   433  	if r == '\n' {
   434  		in.pos.Line++
   435  		in.pos.LineRune = 1
   436  	} else {
   437  		in.pos.LineRune++
   438  	}
   439  	in.pos.Byte += size
   440  	return int(r)
   441  }
   442  
   443  type token struct {
   444  	kind   tokenKind
   445  	pos    Position
   446  	endPos Position
   447  	text   string
   448  }
   449  
   450  type tokenKind int
   451  
   452  const (
   453  	_EOF tokenKind = -(iota + 1)
   454  	_EOLCOMMENT
   455  	_IDENT
   456  	_STRING
   457  	_COMMENT
   458  
   459  	// newlines and punctuation tokens are allowed as ASCII codes.
   460  )
   461  
   462  func (k tokenKind) isComment() bool {
   463  	return k == _COMMENT || k == _EOLCOMMENT
   464  }
   465  
   466  // isEOL returns whether a token terminates a line.
   467  func (k tokenKind) isEOL() bool {
   468  	return k == _EOF || k == _EOLCOMMENT || k == '\n'
   469  }
   470  
   471  // startToken marks the beginning of the next input token.
   472  // It must be followed by a call to endToken, once the token's text has
   473  // been consumed using readRune.
   474  func (in *input) startToken() {
   475  	in.tokenStart = in.remaining
   476  	in.token.text = ""
   477  	in.token.pos = in.pos
   478  }
   479  
   480  // endToken marks the end of an input token.
   481  // It records the actual token string in tok.text.
   482  // A single trailing newline (LF or CRLF) will be removed from comment tokens.
   483  func (in *input) endToken(kind tokenKind) {
   484  	in.token.kind = kind
   485  	text := string(in.tokenStart[:len(in.tokenStart)-len(in.remaining)])
   486  	if kind.isComment() {
   487  		if strings.HasSuffix(text, "\r\n") {
   488  			text = text[:len(text)-2]
   489  		} else {
   490  			text = strings.TrimSuffix(text, "\n")
   491  		}
   492  	}
   493  	in.token.text = text
   494  	in.token.endPos = in.pos
   495  }
   496  
   497  // peek returns the kind of the next token returned by lex.
   498  func (in *input) peek() tokenKind {
   499  	return in.token.kind
   500  }
   501  
   502  // lex is called from the parser to obtain the next input token.
   503  func (in *input) lex() token {
   504  	tok := in.token
   505  	in.readToken()
   506  	return tok
   507  }
   508  
   509  // readToken lexes the next token from the text and stores it in in.token.
   510  func (in *input) readToken() {
   511  	// Skip past spaces, stopping at non-space or EOF.
   512  	for !in.eof() {
   513  		c := in.peekRune()
   514  		if c == ' ' || c == '\t' || c == '\r' {
   515  			in.readRune()
   516  			continue
   517  		}
   518  
   519  		// Comment runs to end of line.
   520  		if in.peekPrefix("//") {
   521  			in.startToken()
   522  
   523  			// Is this comment the only thing on its line?
   524  			// Find the last \n before this // and see if it's all
   525  			// spaces from there to here.
   526  			i := bytes.LastIndex(in.complete[:in.pos.Byte], []byte("\n"))
   527  			suffix := len(bytes.TrimSpace(in.complete[i+1:in.pos.Byte])) > 0
   528  			in.readRune()
   529  			in.readRune()
   530  
   531  			// Consume comment.
   532  			for len(in.remaining) > 0 && in.readRune() != '\n' {
   533  			}
   534  
   535  			// If we are at top level (not in a statement), hand the comment to
   536  			// the parser as a _COMMENT token. The grammar is written
   537  			// to handle top-level comments itself.
   538  			if !suffix {
   539  				in.endToken(_COMMENT)
   540  				return
   541  			}
   542  
   543  			// Otherwise, save comment for later attachment to syntax tree.
   544  			in.endToken(_EOLCOMMENT)
   545  			in.comments = append(in.comments, Comment{in.token.pos, in.token.text, suffix})
   546  			return
   547  		}
   548  
   549  		if in.peekPrefix("/*") {
   550  			in.Error("mod files must use // comments (not /* */ comments)")
   551  		}
   552  
   553  		// Found non-space non-comment.
   554  		break
   555  	}
   556  
   557  	// Found the beginning of the next token.
   558  	in.startToken()
   559  
   560  	// End of file.
   561  	if in.eof() {
   562  		in.endToken(_EOF)
   563  		return
   564  	}
   565  
   566  	// Punctuation tokens.
   567  	switch c := in.peekRune(); c {
   568  	case '\n', '(', ')', '[', ']', '{', '}', ',':
   569  		in.readRune()
   570  		in.endToken(tokenKind(c))
   571  		return
   572  
   573  	case '"', '`': // quoted string
   574  		quote := c
   575  		in.readRune()
   576  		for {
   577  			if in.eof() {
   578  				in.pos = in.token.pos
   579  				in.Error("unexpected EOF in string")
   580  			}
   581  			if in.peekRune() == '\n' {
   582  				in.Error("unexpected newline in string")
   583  			}
   584  			c := in.readRune()
   585  			if c == quote {
   586  				break
   587  			}
   588  			if c == '\\' && quote != '`' {
   589  				if in.eof() {
   590  					in.pos = in.token.pos
   591  					in.Error("unexpected EOF in string")
   592  				}
   593  				in.readRune()
   594  			}
   595  		}
   596  		in.endToken(_STRING)
   597  		return
   598  	}
   599  
   600  	// Checked all punctuation. Must be identifier token.
   601  	if c := in.peekRune(); !isIdent(c) {
   602  		in.Error(fmt.Sprintf("unexpected input character %#q", c))
   603  	}
   604  
   605  	// Scan over identifier.
   606  	for isIdent(in.peekRune()) {
   607  		if in.peekPrefix("//") {
   608  			break
   609  		}
   610  		if in.peekPrefix("/*") {
   611  			in.Error("mod files must use // comments (not /* */ comments)")
   612  		}
   613  		in.readRune()
   614  	}
   615  	in.endToken(_IDENT)
   616  }
   617  
   618  // isIdent reports whether c is an identifier rune.
   619  // We treat most printable runes as identifier runes, except for a handful of
   620  // ASCII punctuation characters.
   621  func isIdent(c int) bool {
   622  	switch r := rune(c); r {
   623  	case ' ', '(', ')', '[', ']', '{', '}', ',':
   624  		return false
   625  	default:
   626  		return !unicode.IsSpace(r) && unicode.IsPrint(r)
   627  	}
   628  }
   629  
   630  // Comment assignment.
   631  // We build two lists of all subexpressions, preorder and postorder.
   632  // The preorder list is ordered by start location, with outer expressions first.
   633  // The postorder list is ordered by end location, with outer expressions last.
   634  // We use the preorder list to assign each whole-line comment to the syntax
   635  // immediately following it, and we use the postorder list to assign each
   636  // end-of-line comment to the syntax immediately preceding it.
   637  
   638  // order walks the expression adding it and its subexpressions to the
   639  // preorder and postorder lists.
   640  func (in *input) order(x Expr) {
   641  	if x != nil {
   642  		in.pre = append(in.pre, x)
   643  	}
   644  	switch x := x.(type) {
   645  	default:
   646  		panic(fmt.Errorf("order: unexpected type %T", x))
   647  	case nil:
   648  		// nothing
   649  	case *LParen, *RParen:
   650  		// nothing
   651  	case *CommentBlock:
   652  		// nothing
   653  	case *Line:
   654  		// nothing
   655  	case *FileSyntax:
   656  		for _, stmt := range x.Stmt {
   657  			in.order(stmt)
   658  		}
   659  	case *LineBlock:
   660  		in.order(&x.LParen)
   661  		for _, l := range x.Line {
   662  			in.order(l)
   663  		}
   664  		in.order(&x.RParen)
   665  	}
   666  	if x != nil {
   667  		in.post = append(in.post, x)
   668  	}
   669  }
   670  
   671  // assignComments attaches comments to nearby syntax.
   672  func (in *input) assignComments() {
   673  	const debug = false
   674  
   675  	// Generate preorder and postorder lists.
   676  	in.order(in.file)
   677  
   678  	// Split into whole-line comments and suffix comments.
   679  	var line, suffix []Comment
   680  	for _, com := range in.comments {
   681  		if com.Suffix {
   682  			suffix = append(suffix, com)
   683  		} else {
   684  			line = append(line, com)
   685  		}
   686  	}
   687  
   688  	if debug {
   689  		for _, c := range line {
   690  			fmt.Fprintf(os.Stderr, "LINE %q :%d:%d #%d\n", c.Token, c.Start.Line, c.Start.LineRune, c.Start.Byte)
   691  		}
   692  	}
   693  
   694  	// Assign line comments to syntax immediately following.
   695  	for _, x := range in.pre {
   696  		start, _ := x.Span()
   697  		if debug {
   698  			fmt.Fprintf(os.Stderr, "pre %T :%d:%d #%d\n", x, start.Line, start.LineRune, start.Byte)
   699  		}
   700  		xcom := x.Comment()
   701  		for len(line) > 0 && start.Byte >= line[0].Start.Byte {
   702  			if debug {
   703  				fmt.Fprintf(os.Stderr, "ASSIGN LINE %q #%d\n", line[0].Token, line[0].Start.Byte)
   704  			}
   705  			xcom.Before = append(xcom.Before, line[0])
   706  			line = line[1:]
   707  		}
   708  	}
   709  
   710  	// Remaining line comments go at end of file.
   711  	in.file.After = append(in.file.After, line...)
   712  
   713  	if debug {
   714  		for _, c := range suffix {
   715  			fmt.Fprintf(os.Stderr, "SUFFIX %q :%d:%d #%d\n", c.Token, c.Start.Line, c.Start.LineRune, c.Start.Byte)
   716  		}
   717  	}
   718  
   719  	// Assign suffix comments to syntax immediately before.
   720  	for i := len(in.post) - 1; i >= 0; i-- {
   721  		x := in.post[i]
   722  
   723  		start, end := x.Span()
   724  		if debug {
   725  			fmt.Fprintf(os.Stderr, "post %T :%d:%d #%d :%d:%d #%d\n", x, start.Line, start.LineRune, start.Byte, end.Line, end.LineRune, end.Byte)
   726  		}
   727  
   728  		// Do not assign suffix comments to end of line block or whole file.
   729  		// Instead assign them to the last element inside.
   730  		switch x.(type) {
   731  		case *FileSyntax:
   732  			continue
   733  		}
   734  
   735  		// Do not assign suffix comments to something that starts
   736  		// on an earlier line, so that in
   737  		//
   738  		//	x ( y
   739  		//		z ) // comment
   740  		//
   741  		// we assign the comment to z and not to x ( ... ).
   742  		if start.Line != end.Line {
   743  			continue
   744  		}
   745  		xcom := x.Comment()
   746  		for len(suffix) > 0 && end.Byte <= suffix[len(suffix)-1].Start.Byte {
   747  			if debug {
   748  				fmt.Fprintf(os.Stderr, "ASSIGN SUFFIX %q #%d\n", suffix[len(suffix)-1].Token, suffix[len(suffix)-1].Start.Byte)
   749  			}
   750  			xcom.Suffix = append(xcom.Suffix, suffix[len(suffix)-1])
   751  			suffix = suffix[:len(suffix)-1]
   752  		}
   753  	}
   754  
   755  	// We assigned suffix comments in reverse.
   756  	// If multiple suffix comments were appended to the same
   757  	// expression node, they are now in reverse. Fix that.
   758  	for _, x := range in.post {
   759  		reverseComments(x.Comment().Suffix)
   760  	}
   761  
   762  	// Remaining suffix comments go at beginning of file.
   763  	in.file.Before = append(in.file.Before, suffix...)
   764  }
   765  
   766  // reverseComments reverses the []Comment list.
   767  func reverseComments(list []Comment) {
   768  	for i, j := 0, len(list)-1; i < j; i, j = i+1, j-1 {
   769  		list[i], list[j] = list[j], list[i]
   770  	}
   771  }
   772  
   773  func (in *input) parseFile() {
   774  	in.file = new(FileSyntax)
   775  	var cb *CommentBlock
   776  	for {
   777  		switch in.peek() {
   778  		case '\n':
   779  			in.lex()
   780  			if cb != nil {
   781  				in.file.Stmt = append(in.file.Stmt, cb)
   782  				cb = nil
   783  			}
   784  		case _COMMENT:
   785  			tok := in.lex()
   786  			if cb == nil {
   787  				cb = &CommentBlock{Start: tok.pos}
   788  			}
   789  			com := cb.Comment()
   790  			com.Before = append(com.Before, Comment{Start: tok.pos, Token: tok.text})
   791  		case _EOF:
   792  			if cb != nil {
   793  				in.file.Stmt = append(in.file.Stmt, cb)
   794  			}
   795  			return
   796  		default:
   797  			in.parseStmt()
   798  			if cb != nil {
   799  				in.file.Stmt[len(in.file.Stmt)-1].Comment().Before = cb.Before
   800  				cb = nil
   801  			}
   802  		}
   803  	}
   804  }
   805  
   806  func (in *input) parseStmt() {
   807  	tok := in.lex()
   808  	start := tok.pos
   809  	end := tok.endPos
   810  	tokens := []string{tok.text}
   811  	for {
   812  		tok := in.lex()
   813  		switch {
   814  		case tok.kind.isEOL():
   815  			in.file.Stmt = append(in.file.Stmt, &Line{
   816  				Start: start,
   817  				Token: tokens,
   818  				End:   end,
   819  			})
   820  			return
   821  
   822  		case tok.kind == '(':
   823  			if next := in.peek(); next.isEOL() {
   824  				// Start of block: no more tokens on this line.
   825  				in.file.Stmt = append(in.file.Stmt, in.parseLineBlock(start, tokens, tok))
   826  				return
   827  			} else if next == ')' {
   828  				rparen := in.lex()
   829  				if in.peek().isEOL() {
   830  					// Empty block.
   831  					in.lex()
   832  					in.file.Stmt = append(in.file.Stmt, &LineBlock{
   833  						Start:  start,
   834  						Token:  tokens,
   835  						LParen: LParen{Pos: tok.pos},
   836  						RParen: RParen{Pos: rparen.pos},
   837  					})
   838  					return
   839  				}
   840  				// '( )' in the middle of the line, not a block.
   841  				tokens = append(tokens, tok.text, rparen.text)
   842  			} else {
   843  				// '(' in the middle of the line, not a block.
   844  				tokens = append(tokens, tok.text)
   845  			}
   846  
   847  		default:
   848  			tokens = append(tokens, tok.text)
   849  			end = tok.endPos
   850  		}
   851  	}
   852  }
   853  
   854  func (in *input) parseLineBlock(start Position, token []string, lparen token) *LineBlock {
   855  	x := &LineBlock{
   856  		Start:  start,
   857  		Token:  token,
   858  		LParen: LParen{Pos: lparen.pos},
   859  	}
   860  	var comments []Comment
   861  	for {
   862  		switch in.peek() {
   863  		case _EOLCOMMENT:
   864  			// Suffix comment, will be attached later by assignComments.
   865  			in.lex()
   866  		case '\n':
   867  			// Blank line. Add an empty comment to preserve it.
   868  			in.lex()
   869  			if len(comments) == 0 && len(x.Line) > 0 || len(comments) > 0 && comments[len(comments)-1].Token != "" {
   870  				comments = append(comments, Comment{})
   871  			}
   872  		case _COMMENT:
   873  			tok := in.lex()
   874  			comments = append(comments, Comment{Start: tok.pos, Token: tok.text})
   875  		case _EOF:
   876  			in.Error(fmt.Sprintf("syntax error (unterminated block started at %s:%d:%d)", in.filename, x.Start.Line, x.Start.LineRune))
   877  		case ')':
   878  			rparen := in.lex()
   879  			x.RParen.Before = comments
   880  			x.RParen.Pos = rparen.pos
   881  			if !in.peek().isEOL() {
   882  				in.Error("syntax error (expected newline after closing paren)")
   883  			}
   884  			in.lex()
   885  			return x
   886  		default:
   887  			l := in.parseLine()
   888  			x.Line = append(x.Line, l)
   889  			l.Comment().Before = comments
   890  			comments = nil
   891  		}
   892  	}
   893  }
   894  
   895  func (in *input) parseLine() *Line {
   896  	tok := in.lex()
   897  	if tok.kind.isEOL() {
   898  		in.Error("internal parse error: parseLine at end of line")
   899  	}
   900  	start := tok.pos
   901  	end := tok.endPos
   902  	tokens := []string{tok.text}
   903  	for {
   904  		tok := in.lex()
   905  		if tok.kind.isEOL() {
   906  			return &Line{
   907  				Start:   start,
   908  				Token:   tokens,
   909  				End:     end,
   910  				InBlock: true,
   911  			}
   912  		}
   913  		tokens = append(tokens, tok.text)
   914  		end = tok.endPos
   915  	}
   916  }
   917  
   918  var (
   919  	slashSlash = []byte("//")
   920  	moduleStr  = []byte("module")
   921  )
   922  
   923  // ModulePath returns the module path from the gomod file text.
   924  // If it cannot find a module path, it returns an empty string.
   925  // It is tolerant of unrelated problems in the go.mod file.
   926  func ModulePath(mod []byte) string {
   927  	for len(mod) > 0 {
   928  		line := mod
   929  		mod = nil
   930  		if i := bytes.IndexByte(line, '\n'); i >= 0 {
   931  			line, mod = line[:i], line[i+1:]
   932  		}
   933  		if i := bytes.Index(line, slashSlash); i >= 0 {
   934  			line = line[:i]
   935  		}
   936  		line = bytes.TrimSpace(line)
   937  		if !bytes.HasPrefix(line, moduleStr) {
   938  			continue
   939  		}
   940  		line = line[len(moduleStr):]
   941  		n := len(line)
   942  		line = bytes.TrimSpace(line)
   943  		if len(line) == n || len(line) == 0 {
   944  			continue
   945  		}
   946  
   947  		if line[0] == '"' || line[0] == '`' {
   948  			p, err := strconv.Unquote(string(line))
   949  			if err != nil {
   950  				return "" // malformed quoted string or multiline module path
   951  			}
   952  			return p
   953  		}
   954  
   955  		return string(line)
   956  	}
   957  	return "" // missing module path
   958  }
   959
View as plain text