...

Source file src/github.com/yuin/goldmark/text/reader.go

Documentation: github.com/yuin/goldmark/text

     1  package text
     2  
     3  import (
     4  	"io"
     5  	"regexp"
     6  	"unicode/utf8"
     7  
     8  	"github.com/yuin/goldmark/util"
     9  )
    10  
    11  const invalidValue = -1
    12  
    13  // EOF indicates the end of file.
    14  const EOF = byte(0xff)
    15  
    16  // A Reader interface provides abstracted method for reading text.
    17  type Reader interface {
    18  	io.RuneReader
    19  
    20  	// Source returns a source of the reader.
    21  	Source() []byte
    22  
    23  	// ResetPosition resets positions.
    24  	ResetPosition()
    25  
    26  	// Peek returns a byte at current position without advancing the internal pointer.
    27  	Peek() byte
    28  
    29  	// PeekLine returns the current line without advancing the internal pointer.
    30  	PeekLine() ([]byte, Segment)
    31  
    32  	// PrecendingCharacter returns a character just before current internal pointer.
    33  	PrecendingCharacter() rune
    34  
    35  	// Value returns a value of the given segment.
    36  	Value(Segment) []byte
    37  
    38  	// LineOffset returns a distance from the line head to current position.
    39  	LineOffset() int
    40  
    41  	// Position returns current line number and position.
    42  	Position() (int, Segment)
    43  
    44  	// SetPosition sets current line number and position.
    45  	SetPosition(int, Segment)
    46  
    47  	// SetPadding sets padding to the reader.
    48  	SetPadding(int)
    49  
    50  	// Advance advances the internal pointer.
    51  	Advance(int)
    52  
    53  	// AdvanceAndSetPadding advances the internal pointer and add padding to the
    54  	// reader.
    55  	AdvanceAndSetPadding(int, int)
    56  
    57  	// AdvanceLine advances the internal pointer to the next line head.
    58  	AdvanceLine()
    59  
    60  	// SkipSpaces skips space characters and returns a non-blank line.
    61  	// If it reaches EOF, returns false.
    62  	SkipSpaces() (Segment, int, bool)
    63  
    64  	// SkipSpaces skips blank lines and returns a non-blank line.
    65  	// If it reaches EOF, returns false.
    66  	SkipBlankLines() (Segment, int, bool)
    67  
    68  	// Match performs regular expression matching to current line.
    69  	Match(reg *regexp.Regexp) bool
    70  
    71  	// Match performs regular expression searching to current line.
    72  	FindSubMatch(reg *regexp.Regexp) [][]byte
    73  
    74  	// FindClosure finds corresponding closure.
    75  	FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool)
    76  }
    77  
    78  // FindClosureOptions is options for Reader.FindClosure
    79  type FindClosureOptions struct {
    80  	// CodeSpan is a flag for the FindClosure. If this is set to true,
    81  	// FindClosure ignores closers in codespans.
    82  	CodeSpan bool
    83  
    84  	// Nesting is a flag for the FindClosure. If this is set to true,
    85  	// FindClosure allows nesting.
    86  	Nesting bool
    87  
    88  	// Newline is a flag for the FindClosure. If this is set to true,
    89  	// FindClosure searches for a closer over multiple lines.
    90  	Newline bool
    91  
    92  	// Advance is a flag for the FindClosure. If this is set to true,
    93  	// FindClosure advances pointers when closer is found.
    94  	Advance bool
    95  }
    96  
    97  type reader struct {
    98  	source       []byte
    99  	sourceLength int
   100  	line         int
   101  	peekedLine   []byte
   102  	pos          Segment
   103  	head         int
   104  	lineOffset   int
   105  }
   106  
   107  // NewReader return a new Reader that can read UTF-8 bytes .
   108  func NewReader(source []byte) Reader {
   109  	r := &reader{
   110  		source:       source,
   111  		sourceLength: len(source),
   112  	}
   113  	r.ResetPosition()
   114  	return r
   115  }
   116  
   117  func (r *reader) FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool) {
   118  	return findClosureReader(r, opener, closer, options)
   119  }
   120  
   121  func (r *reader) ResetPosition() {
   122  	r.line = -1
   123  	r.head = 0
   124  	r.lineOffset = -1
   125  	r.AdvanceLine()
   126  }
   127  
   128  func (r *reader) Source() []byte {
   129  	return r.source
   130  }
   131  
   132  func (r *reader) Value(seg Segment) []byte {
   133  	return seg.Value(r.source)
   134  }
   135  
   136  func (r *reader) Peek() byte {
   137  	if r.pos.Start >= 0 && r.pos.Start < r.sourceLength {
   138  		if r.pos.Padding != 0 {
   139  			return space[0]
   140  		}
   141  		return r.source[r.pos.Start]
   142  	}
   143  	return EOF
   144  }
   145  
   146  func (r *reader) PeekLine() ([]byte, Segment) {
   147  	if r.pos.Start >= 0 && r.pos.Start < r.sourceLength {
   148  		if r.peekedLine == nil {
   149  			r.peekedLine = r.pos.Value(r.Source())
   150  		}
   151  		return r.peekedLine, r.pos
   152  	}
   153  	return nil, r.pos
   154  }
   155  
   156  // io.RuneReader interface
   157  func (r *reader) ReadRune() (rune, int, error) {
   158  	return readRuneReader(r)
   159  }
   160  
   161  func (r *reader) LineOffset() int {
   162  	if r.lineOffset < 0 {
   163  		v := 0
   164  		for i := r.head; i < r.pos.Start; i++ {
   165  			if r.source[i] == '\t' {
   166  				v += util.TabWidth(v)
   167  			} else {
   168  				v++
   169  			}
   170  		}
   171  		r.lineOffset = v - r.pos.Padding
   172  	}
   173  	return r.lineOffset
   174  }
   175  
   176  func (r *reader) PrecendingCharacter() rune {
   177  	if r.pos.Start <= 0 {
   178  		if r.pos.Padding != 0 {
   179  			return rune(' ')
   180  		}
   181  		return rune('\n')
   182  	}
   183  	i := r.pos.Start - 1
   184  	for ; i >= 0; i-- {
   185  		if utf8.RuneStart(r.source[i]) {
   186  			break
   187  		}
   188  	}
   189  	rn, _ := utf8.DecodeRune(r.source[i:])
   190  	return rn
   191  }
   192  
   193  func (r *reader) Advance(n int) {
   194  	r.lineOffset = -1
   195  	if n < len(r.peekedLine) && r.pos.Padding == 0 {
   196  		r.pos.Start += n
   197  		r.peekedLine = nil
   198  		return
   199  	}
   200  	r.peekedLine = nil
   201  	l := r.sourceLength
   202  	for ; n > 0 && r.pos.Start < l; n-- {
   203  		if r.pos.Padding != 0 {
   204  			r.pos.Padding--
   205  			continue
   206  		}
   207  		if r.source[r.pos.Start] == '\n' {
   208  			r.AdvanceLine()
   209  			continue
   210  		}
   211  		r.pos.Start++
   212  	}
   213  }
   214  
   215  func (r *reader) AdvanceAndSetPadding(n, padding int) {
   216  	r.Advance(n)
   217  	if padding > r.pos.Padding {
   218  		r.SetPadding(padding)
   219  	}
   220  }
   221  
   222  func (r *reader) AdvanceLine() {
   223  	r.lineOffset = -1
   224  	r.peekedLine = nil
   225  	r.pos.Start = r.pos.Stop
   226  	r.head = r.pos.Start
   227  	if r.pos.Start < 0 {
   228  		return
   229  	}
   230  	r.pos.Stop = r.sourceLength
   231  	for i := r.pos.Start; i < r.sourceLength; i++ {
   232  		c := r.source[i]
   233  		if c == '\n' {
   234  			r.pos.Stop = i + 1
   235  			break
   236  		}
   237  	}
   238  	r.line++
   239  	r.pos.Padding = 0
   240  }
   241  
   242  func (r *reader) Position() (int, Segment) {
   243  	return r.line, r.pos
   244  }
   245  
   246  func (r *reader) SetPosition(line int, pos Segment) {
   247  	r.lineOffset = -1
   248  	r.line = line
   249  	r.pos = pos
   250  }
   251  
   252  func (r *reader) SetPadding(v int) {
   253  	r.pos.Padding = v
   254  }
   255  
   256  func (r *reader) SkipSpaces() (Segment, int, bool) {
   257  	return skipSpacesReader(r)
   258  }
   259  
   260  func (r *reader) SkipBlankLines() (Segment, int, bool) {
   261  	return skipBlankLinesReader(r)
   262  }
   263  
   264  func (r *reader) Match(reg *regexp.Regexp) bool {
   265  	return matchReader(r, reg)
   266  }
   267  
   268  func (r *reader) FindSubMatch(reg *regexp.Regexp) [][]byte {
   269  	return findSubMatchReader(r, reg)
   270  }
   271  
   272  // A BlockReader interface is a reader that is optimized for Blocks.
   273  type BlockReader interface {
   274  	Reader
   275  	// Reset resets current state and sets new segments to the reader.
   276  	Reset(segment *Segments)
   277  }
   278  
   279  type blockReader struct {
   280  	source         []byte
   281  	segments       *Segments
   282  	segmentsLength int
   283  	line           int
   284  	pos            Segment
   285  	head           int
   286  	last           int
   287  	lineOffset     int
   288  }
   289  
   290  // NewBlockReader returns a new BlockReader.
   291  func NewBlockReader(source []byte, segments *Segments) BlockReader {
   292  	r := &blockReader{
   293  		source: source,
   294  	}
   295  	if segments != nil {
   296  		r.Reset(segments)
   297  	}
   298  	return r
   299  }
   300  
   301  func (r *blockReader) FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool) {
   302  	return findClosureReader(r, opener, closer, options)
   303  }
   304  
   305  func (r *blockReader) ResetPosition() {
   306  	r.line = -1
   307  	r.head = 0
   308  	r.last = 0
   309  	r.lineOffset = -1
   310  	r.pos.Start = -1
   311  	r.pos.Stop = -1
   312  	r.pos.Padding = 0
   313  	if r.segmentsLength > 0 {
   314  		last := r.segments.At(r.segmentsLength - 1)
   315  		r.last = last.Stop
   316  	}
   317  	r.AdvanceLine()
   318  }
   319  
   320  func (r *blockReader) Reset(segments *Segments) {
   321  	r.segments = segments
   322  	r.segmentsLength = segments.Len()
   323  	r.ResetPosition()
   324  }
   325  
   326  func (r *blockReader) Source() []byte {
   327  	return r.source
   328  }
   329  
   330  func (r *blockReader) Value(seg Segment) []byte {
   331  	line := r.segmentsLength - 1
   332  	ret := make([]byte, 0, seg.Stop-seg.Start+1)
   333  	for ; line >= 0; line-- {
   334  		if seg.Start >= r.segments.At(line).Start {
   335  			break
   336  		}
   337  	}
   338  	i := seg.Start
   339  	for ; line < r.segmentsLength; line++ {
   340  		s := r.segments.At(line)
   341  		if i < 0 {
   342  			i = s.Start
   343  		}
   344  		ret = s.ConcatPadding(ret)
   345  		for ; i < seg.Stop && i < s.Stop; i++ {
   346  			ret = append(ret, r.source[i])
   347  		}
   348  		i = -1
   349  		if s.Stop > seg.Stop {
   350  			break
   351  		}
   352  	}
   353  	return ret
   354  }
   355  
   356  // io.RuneReader interface
   357  func (r *blockReader) ReadRune() (rune, int, error) {
   358  	return readRuneReader(r)
   359  }
   360  
   361  func (r *blockReader) PrecendingCharacter() rune {
   362  	if r.pos.Padding != 0 {
   363  		return rune(' ')
   364  	}
   365  	if r.segments.Len() < 1 {
   366  		return rune('\n')
   367  	}
   368  	firstSegment := r.segments.At(0)
   369  	if r.line == 0 && r.pos.Start <= firstSegment.Start {
   370  		return rune('\n')
   371  	}
   372  	l := len(r.source)
   373  	i := r.pos.Start - 1
   374  	for ; i < l && i >= 0; i-- {
   375  		if utf8.RuneStart(r.source[i]) {
   376  			break
   377  		}
   378  	}
   379  	if i < 0 || i >= l {
   380  		return rune('\n')
   381  	}
   382  	rn, _ := utf8.DecodeRune(r.source[i:])
   383  	return rn
   384  }
   385  
   386  func (r *blockReader) LineOffset() int {
   387  	if r.lineOffset < 0 {
   388  		v := 0
   389  		for i := r.head; i < r.pos.Start; i++ {
   390  			if r.source[i] == '\t' {
   391  				v += util.TabWidth(v)
   392  			} else {
   393  				v++
   394  			}
   395  		}
   396  		r.lineOffset = v - r.pos.Padding
   397  	}
   398  	return r.lineOffset
   399  }
   400  
   401  func (r *blockReader) Peek() byte {
   402  	if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last {
   403  		if r.pos.Padding != 0 {
   404  			return space[0]
   405  		}
   406  		return r.source[r.pos.Start]
   407  	}
   408  	return EOF
   409  }
   410  
   411  func (r *blockReader) PeekLine() ([]byte, Segment) {
   412  	if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last {
   413  		return r.pos.Value(r.source), r.pos
   414  	}
   415  	return nil, r.pos
   416  }
   417  
   418  func (r *blockReader) Advance(n int) {
   419  	r.lineOffset = -1
   420  
   421  	if n < r.pos.Stop-r.pos.Start && r.pos.Padding == 0 {
   422  		r.pos.Start += n
   423  		return
   424  	}
   425  
   426  	for ; n > 0; n-- {
   427  		if r.pos.Padding != 0 {
   428  			r.pos.Padding--
   429  			continue
   430  		}
   431  		if r.pos.Start >= r.pos.Stop-1 && r.pos.Stop < r.last {
   432  			r.AdvanceLine()
   433  			continue
   434  		}
   435  		r.pos.Start++
   436  	}
   437  }
   438  
   439  func (r *blockReader) AdvanceAndSetPadding(n, padding int) {
   440  	r.Advance(n)
   441  	if padding > r.pos.Padding {
   442  		r.SetPadding(padding)
   443  	}
   444  }
   445  
   446  func (r *blockReader) AdvanceLine() {
   447  	r.SetPosition(r.line+1, NewSegment(invalidValue, invalidValue))
   448  	r.head = r.pos.Start
   449  }
   450  
   451  func (r *blockReader) Position() (int, Segment) {
   452  	return r.line, r.pos
   453  }
   454  
   455  func (r *blockReader) SetPosition(line int, pos Segment) {
   456  	r.lineOffset = -1
   457  	r.line = line
   458  	if pos.Start == invalidValue {
   459  		if r.line < r.segmentsLength {
   460  			s := r.segments.At(line)
   461  			r.head = s.Start
   462  			r.pos = s
   463  		}
   464  	} else {
   465  		r.pos = pos
   466  		if r.line < r.segmentsLength {
   467  			s := r.segments.At(line)
   468  			r.head = s.Start
   469  		}
   470  	}
   471  }
   472  
   473  func (r *blockReader) SetPadding(v int) {
   474  	r.lineOffset = -1
   475  	r.pos.Padding = v
   476  }
   477  
   478  func (r *blockReader) SkipSpaces() (Segment, int, bool) {
   479  	return skipSpacesReader(r)
   480  }
   481  
   482  func (r *blockReader) SkipBlankLines() (Segment, int, bool) {
   483  	return skipBlankLinesReader(r)
   484  }
   485  
   486  func (r *blockReader) Match(reg *regexp.Regexp) bool {
   487  	return matchReader(r, reg)
   488  }
   489  
   490  func (r *blockReader) FindSubMatch(reg *regexp.Regexp) [][]byte {
   491  	return findSubMatchReader(r, reg)
   492  }
   493  
   494  func skipBlankLinesReader(r Reader) (Segment, int, bool) {
   495  	lines := 0
   496  	for {
   497  		line, seg := r.PeekLine()
   498  		if line == nil {
   499  			return seg, lines, false
   500  		}
   501  		if util.IsBlank(line) {
   502  			lines++
   503  			r.AdvanceLine()
   504  		} else {
   505  			return seg, lines, true
   506  		}
   507  	}
   508  }
   509  
   510  func skipSpacesReader(r Reader) (Segment, int, bool) {
   511  	chars := 0
   512  	for {
   513  		line, segment := r.PeekLine()
   514  		if line == nil {
   515  			return segment, chars, false
   516  		}
   517  		for i, c := range line {
   518  			if util.IsSpace(c) {
   519  				chars++
   520  				r.Advance(1)
   521  				continue
   522  			}
   523  			return segment.WithStart(segment.Start + i + 1), chars, true
   524  		}
   525  	}
   526  }
   527  
   528  func matchReader(r Reader, reg *regexp.Regexp) bool {
   529  	oldline, oldseg := r.Position()
   530  	match := reg.FindReaderSubmatchIndex(r)
   531  	r.SetPosition(oldline, oldseg)
   532  	if match == nil {
   533  		return false
   534  	}
   535  	r.Advance(match[1] - match[0])
   536  	return true
   537  }
   538  
   539  func findSubMatchReader(r Reader, reg *regexp.Regexp) [][]byte {
   540  	oldline, oldseg := r.Position()
   541  	match := reg.FindReaderSubmatchIndex(r)
   542  	r.SetPosition(oldline, oldseg)
   543  	if match == nil {
   544  		return nil
   545  	}
   546  	runes := make([]rune, 0, match[1]-match[0])
   547  	for i := 0; i < match[1]; {
   548  		r, size, _ := readRuneReader(r)
   549  		i += size
   550  		runes = append(runes, r)
   551  	}
   552  	result := [][]byte{}
   553  	for i := 0; i < len(match); i += 2 {
   554  		result = append(result, []byte(string(runes[match[i]:match[i+1]])))
   555  	}
   556  
   557  	r.SetPosition(oldline, oldseg)
   558  	r.Advance(match[1] - match[0])
   559  	return result
   560  }
   561  
   562  func readRuneReader(r Reader) (rune, int, error) {
   563  	line, _ := r.PeekLine()
   564  	if line == nil {
   565  		return 0, 0, io.EOF
   566  	}
   567  	rn, size := utf8.DecodeRune(line)
   568  	if rn == utf8.RuneError {
   569  		return 0, 0, io.EOF
   570  	}
   571  	r.Advance(size)
   572  	return rn, size, nil
   573  }
   574  
   575  func findClosureReader(r Reader, opener, closer byte, opts FindClosureOptions) (*Segments, bool) {
   576  	opened := 1
   577  	codeSpanOpener := 0
   578  	closed := false
   579  	orgline, orgpos := r.Position()
   580  	var ret *Segments
   581  
   582  	for {
   583  		bs, seg := r.PeekLine()
   584  		if bs == nil {
   585  			goto end
   586  		}
   587  		i := 0
   588  		for i < len(bs) {
   589  			c := bs[i]
   590  			if opts.CodeSpan && codeSpanOpener != 0 && c == '`' {
   591  				codeSpanCloser := 0
   592  				for ; i < len(bs); i++ {
   593  					if bs[i] == '`' {
   594  						codeSpanCloser++
   595  					} else {
   596  						i--
   597  						break
   598  					}
   599  				}
   600  				if codeSpanCloser == codeSpanOpener {
   601  					codeSpanOpener = 0
   602  				}
   603  			} else if codeSpanOpener == 0 && c == '\\' && i < len(bs)-1 && util.IsPunct(bs[i+1]) {
   604  				i += 2
   605  				continue
   606  			} else if opts.CodeSpan && codeSpanOpener == 0 && c == '`' {
   607  				for ; i < len(bs); i++ {
   608  					if bs[i] == '`' {
   609  						codeSpanOpener++
   610  					} else {
   611  						i--
   612  						break
   613  					}
   614  				}
   615  			} else if (opts.CodeSpan && codeSpanOpener == 0) || !opts.CodeSpan {
   616  				if c == closer {
   617  					opened--
   618  					if opened == 0 {
   619  						if ret == nil {
   620  							ret = NewSegments()
   621  						}
   622  						ret.Append(seg.WithStop(seg.Start + i))
   623  						r.Advance(i + 1)
   624  						closed = true
   625  						goto end
   626  					}
   627  				} else if c == opener {
   628  					if !opts.Nesting {
   629  						goto end
   630  					}
   631  					opened++
   632  				}
   633  			}
   634  			i++
   635  		}
   636  		if !opts.Newline {
   637  			goto end
   638  		}
   639  		r.AdvanceLine()
   640  		if ret == nil {
   641  			ret = NewSegments()
   642  		}
   643  		ret.Append(seg)
   644  	}
   645  end:
   646  	if !opts.Advance {
   647  		r.SetPosition(orgline, orgpos)
   648  	}
   649  	if closed {
   650  		return ret, true
   651  	}
   652  	return nil, false
   653  }
   654  

View as plain text