...

Source file src/github.com/yuin/goldmark/parser/parser.go

Documentation: github.com/yuin/goldmark/parser

     1  // Package parser contains stuff that are related to parsing a Markdown text.
     2  package parser
     3  
     4  import (
     5  	"fmt"
     6  	"strings"
     7  	"sync"
     8  
     9  	"github.com/yuin/goldmark/ast"
    10  	"github.com/yuin/goldmark/text"
    11  	"github.com/yuin/goldmark/util"
    12  )
    13  
    14  // A Reference interface represents a link reference in Markdown text.
    15  type Reference interface {
    16  	// String implements Stringer.
    17  	String() string
    18  
    19  	// Label returns a label of the reference.
    20  	Label() []byte
    21  
    22  	// Destination returns a destination(URL) of the reference.
    23  	Destination() []byte
    24  
    25  	// Title returns a title of the reference.
    26  	Title() []byte
    27  }
    28  
    29  type reference struct {
    30  	label       []byte
    31  	destination []byte
    32  	title       []byte
    33  }
    34  
    35  // NewReference returns a new Reference.
    36  func NewReference(label, destination, title []byte) Reference {
    37  	return &reference{label, destination, title}
    38  }
    39  
    40  func (r *reference) Label() []byte {
    41  	return r.label
    42  }
    43  
    44  func (r *reference) Destination() []byte {
    45  	return r.destination
    46  }
    47  
    48  func (r *reference) Title() []byte {
    49  	return r.title
    50  }
    51  
    52  func (r *reference) String() string {
    53  	return fmt.Sprintf("Reference{Label:%s, Destination:%s, Title:%s}", r.label, r.destination, r.title)
    54  }
    55  
    56  // An IDs interface is a collection of the element ids.
    57  type IDs interface {
    58  	// Generate generates a new element id.
    59  	Generate(value []byte, kind ast.NodeKind) []byte
    60  
    61  	// Put puts a given element id to the used ids table.
    62  	Put(value []byte)
    63  }
    64  
    65  type ids struct {
    66  	values map[string]bool
    67  }
    68  
    69  func newIDs() IDs {
    70  	return &ids{
    71  		values: map[string]bool{},
    72  	}
    73  }
    74  
    75  func (s *ids) Generate(value []byte, kind ast.NodeKind) []byte {
    76  	value = util.TrimLeftSpace(value)
    77  	value = util.TrimRightSpace(value)
    78  	result := []byte{}
    79  	for i := 0; i < len(value); {
    80  		v := value[i]
    81  		l := util.UTF8Len(v)
    82  		i += int(l)
    83  		if l != 1 {
    84  			continue
    85  		}
    86  		if util.IsAlphaNumeric(v) {
    87  			if 'A' <= v && v <= 'Z' {
    88  				v += 'a' - 'A'
    89  			}
    90  			result = append(result, v)
    91  		} else if util.IsSpace(v) || v == '-' || v == '_' {
    92  			result = append(result, '-')
    93  		}
    94  	}
    95  	if len(result) == 0 {
    96  		if kind == ast.KindHeading {
    97  			result = []byte("heading")
    98  		} else {
    99  			result = []byte("id")
   100  		}
   101  	}
   102  	if _, ok := s.values[util.BytesToReadOnlyString(result)]; !ok {
   103  		s.values[util.BytesToReadOnlyString(result)] = true
   104  		return result
   105  	}
   106  	for i := 1; ; i++ {
   107  		newResult := fmt.Sprintf("%s-%d", result, i)
   108  		if _, ok := s.values[newResult]; !ok {
   109  			s.values[newResult] = true
   110  			return []byte(newResult)
   111  		}
   112  
   113  	}
   114  }
   115  
   116  func (s *ids) Put(value []byte) {
   117  	s.values[util.BytesToReadOnlyString(value)] = true
   118  }
   119  
   120  // ContextKey is a key that is used to set arbitrary values to the context.
   121  type ContextKey int
   122  
   123  // ContextKeyMax is a maximum value of the ContextKey.
   124  var ContextKeyMax ContextKey
   125  
   126  // NewContextKey return a new ContextKey value.
   127  func NewContextKey() ContextKey {
   128  	ContextKeyMax++
   129  	return ContextKeyMax
   130  }
   131  
   132  // A Context interface holds a information that are necessary to parse
   133  // Markdown text.
   134  type Context interface {
   135  	// String implements Stringer.
   136  	String() string
   137  
   138  	// Get returns a value associated with the given key.
   139  	Get(ContextKey) interface{}
   140  
   141  	// ComputeIfAbsent computes a value if a value associated with the given key is absent and returns the value.
   142  	ComputeIfAbsent(ContextKey, func() interface{}) interface{}
   143  
   144  	// Set sets the given value to the context.
   145  	Set(ContextKey, interface{})
   146  
   147  	// AddReference adds the given reference to this context.
   148  	AddReference(Reference)
   149  
   150  	// Reference returns (a reference, true) if a reference associated with
   151  	// the given label exists, otherwise (nil, false).
   152  	Reference(label string) (Reference, bool)
   153  
   154  	// References returns a list of references.
   155  	References() []Reference
   156  
   157  	// IDs returns a collection of the element ids.
   158  	IDs() IDs
   159  
   160  	// BlockOffset returns a first non-space character position on current line.
   161  	// This value is valid only for BlockParser.Open.
   162  	// BlockOffset returns -1 if current line is blank.
   163  	BlockOffset() int
   164  
   165  	// BlockOffset sets a first non-space character position on current line.
   166  	// This value is valid only for BlockParser.Open.
   167  	SetBlockOffset(int)
   168  
   169  	// BlockIndent returns an indent width on current line.
   170  	// This value is valid only for BlockParser.Open.
   171  	// BlockIndent returns -1 if current line is blank.
   172  	BlockIndent() int
   173  
   174  	// BlockIndent sets an indent width on current line.
   175  	// This value is valid only for BlockParser.Open.
   176  	SetBlockIndent(int)
   177  
   178  	// FirstDelimiter returns a first delimiter of the current delimiter list.
   179  	FirstDelimiter() *Delimiter
   180  
   181  	// LastDelimiter returns a last delimiter of the current delimiter list.
   182  	LastDelimiter() *Delimiter
   183  
   184  	// PushDelimiter appends the given delimiter to the tail of the current
   185  	// delimiter list.
   186  	PushDelimiter(delimiter *Delimiter)
   187  
   188  	// RemoveDelimiter removes the given delimiter from the current delimiter list.
   189  	RemoveDelimiter(d *Delimiter)
   190  
   191  	// ClearDelimiters clears the current delimiter list.
   192  	ClearDelimiters(bottom ast.Node)
   193  
   194  	// OpenedBlocks returns a list of nodes that are currently in parsing.
   195  	OpenedBlocks() []Block
   196  
   197  	// SetOpenedBlocks sets a list of nodes that are currently in parsing.
   198  	SetOpenedBlocks([]Block)
   199  
   200  	// LastOpenedBlock returns a last node that is currently in parsing.
   201  	LastOpenedBlock() Block
   202  
   203  	// IsInLinkLabel returns true if current position seems to be in link label.
   204  	IsInLinkLabel() bool
   205  }
   206  
   207  // A ContextConfig struct is a data structure that holds configuration of the Context.
   208  type ContextConfig struct {
   209  	IDs IDs
   210  }
   211  
   212  // An ContextOption is a functional option type for the Context.
   213  type ContextOption func(*ContextConfig)
   214  
   215  // WithIDs is a functional option for the Context.
   216  func WithIDs(ids IDs) ContextOption {
   217  	return func(c *ContextConfig) {
   218  		c.IDs = ids
   219  	}
   220  }
   221  
   222  type parseContext struct {
   223  	store         []interface{}
   224  	ids           IDs
   225  	refs          map[string]Reference
   226  	blockOffset   int
   227  	blockIndent   int
   228  	delimiters    *Delimiter
   229  	lastDelimiter *Delimiter
   230  	openedBlocks  []Block
   231  }
   232  
   233  // NewContext returns a new Context.
   234  func NewContext(options ...ContextOption) Context {
   235  	cfg := &ContextConfig{
   236  		IDs: newIDs(),
   237  	}
   238  	for _, option := range options {
   239  		option(cfg)
   240  	}
   241  
   242  	return &parseContext{
   243  		store:         make([]interface{}, ContextKeyMax+1),
   244  		refs:          map[string]Reference{},
   245  		ids:           cfg.IDs,
   246  		blockOffset:   -1,
   247  		blockIndent:   -1,
   248  		delimiters:    nil,
   249  		lastDelimiter: nil,
   250  		openedBlocks:  []Block{},
   251  	}
   252  }
   253  
   254  func (p *parseContext) Get(key ContextKey) interface{} {
   255  	return p.store[key]
   256  }
   257  
   258  func (p *parseContext) ComputeIfAbsent(key ContextKey, f func() interface{}) interface{} {
   259  	v := p.store[key]
   260  	if v == nil {
   261  		v = f()
   262  		p.store[key] = v
   263  	}
   264  	return v
   265  }
   266  
   267  func (p *parseContext) Set(key ContextKey, value interface{}) {
   268  	p.store[key] = value
   269  }
   270  
   271  func (p *parseContext) IDs() IDs {
   272  	return p.ids
   273  }
   274  
   275  func (p *parseContext) BlockOffset() int {
   276  	return p.blockOffset
   277  }
   278  
   279  func (p *parseContext) SetBlockOffset(v int) {
   280  	p.blockOffset = v
   281  }
   282  
   283  func (p *parseContext) BlockIndent() int {
   284  	return p.blockIndent
   285  }
   286  
   287  func (p *parseContext) SetBlockIndent(v int) {
   288  	p.blockIndent = v
   289  }
   290  
   291  func (p *parseContext) LastDelimiter() *Delimiter {
   292  	return p.lastDelimiter
   293  }
   294  
   295  func (p *parseContext) FirstDelimiter() *Delimiter {
   296  	return p.delimiters
   297  }
   298  
   299  func (p *parseContext) PushDelimiter(d *Delimiter) {
   300  	if p.delimiters == nil {
   301  		p.delimiters = d
   302  		p.lastDelimiter = d
   303  	} else {
   304  		l := p.lastDelimiter
   305  		p.lastDelimiter = d
   306  		l.NextDelimiter = d
   307  		d.PreviousDelimiter = l
   308  	}
   309  }
   310  
   311  func (p *parseContext) RemoveDelimiter(d *Delimiter) {
   312  	if d.PreviousDelimiter == nil {
   313  		p.delimiters = d.NextDelimiter
   314  	} else {
   315  		d.PreviousDelimiter.NextDelimiter = d.NextDelimiter
   316  		if d.NextDelimiter != nil {
   317  			d.NextDelimiter.PreviousDelimiter = d.PreviousDelimiter
   318  		}
   319  	}
   320  	if d.NextDelimiter == nil {
   321  		p.lastDelimiter = d.PreviousDelimiter
   322  	}
   323  	if p.delimiters != nil {
   324  		p.delimiters.PreviousDelimiter = nil
   325  	}
   326  	if p.lastDelimiter != nil {
   327  		p.lastDelimiter.NextDelimiter = nil
   328  	}
   329  	d.NextDelimiter = nil
   330  	d.PreviousDelimiter = nil
   331  	if d.Length != 0 {
   332  		ast.MergeOrReplaceTextSegment(d.Parent(), d, d.Segment)
   333  	} else {
   334  		d.Parent().RemoveChild(d.Parent(), d)
   335  	}
   336  }
   337  
   338  func (p *parseContext) ClearDelimiters(bottom ast.Node) {
   339  	if p.lastDelimiter == nil {
   340  		return
   341  	}
   342  	var c ast.Node
   343  	for c = p.lastDelimiter; c != nil && c != bottom; {
   344  		prev := c.PreviousSibling()
   345  		if d, ok := c.(*Delimiter); ok {
   346  			p.RemoveDelimiter(d)
   347  		}
   348  		c = prev
   349  	}
   350  }
   351  
   352  func (p *parseContext) AddReference(ref Reference) {
   353  	key := util.ToLinkReference(ref.Label())
   354  	if _, ok := p.refs[key]; !ok {
   355  		p.refs[key] = ref
   356  	}
   357  }
   358  
   359  func (p *parseContext) Reference(label string) (Reference, bool) {
   360  	v, ok := p.refs[label]
   361  	return v, ok
   362  }
   363  
   364  func (p *parseContext) References() []Reference {
   365  	ret := make([]Reference, 0, len(p.refs))
   366  	for _, v := range p.refs {
   367  		ret = append(ret, v)
   368  	}
   369  	return ret
   370  }
   371  
   372  func (p *parseContext) String() string {
   373  	refs := []string{}
   374  	for _, r := range p.refs {
   375  		refs = append(refs, r.String())
   376  	}
   377  
   378  	return fmt.Sprintf("Context{Store:%#v, Refs:%s}", p.store, strings.Join(refs, ","))
   379  }
   380  
   381  func (p *parseContext) OpenedBlocks() []Block {
   382  	return p.openedBlocks
   383  }
   384  
   385  func (p *parseContext) SetOpenedBlocks(v []Block) {
   386  	p.openedBlocks = v
   387  }
   388  
   389  func (p *parseContext) LastOpenedBlock() Block {
   390  	if l := len(p.openedBlocks); l != 0 {
   391  		return p.openedBlocks[l-1]
   392  	}
   393  	return Block{}
   394  }
   395  
   396  func (p *parseContext) IsInLinkLabel() bool {
   397  	tlist := p.Get(linkLabelStateKey)
   398  	return tlist != nil
   399  }
   400  
   401  // State represents parser's state.
   402  // State is designed to use as a bit flag.
   403  type State int
   404  
   405  const (
   406  	none State = 1 << iota
   407  
   408  	// Continue indicates parser can continue parsing.
   409  	Continue
   410  
   411  	// Close indicates parser cannot parse anymore.
   412  	Close
   413  
   414  	// HasChildren indicates parser may have child blocks.
   415  	HasChildren
   416  
   417  	// NoChildren indicates parser does not have child blocks.
   418  	NoChildren
   419  
   420  	// RequireParagraph indicates parser requires that the last node
   421  	// must be a paragraph and is not converted to other nodes by
   422  	// ParagraphTransformers.
   423  	RequireParagraph
   424  )
   425  
   426  // A Config struct is a data structure that holds configuration of the Parser.
   427  type Config struct {
   428  	Options               map[OptionName]interface{}
   429  	BlockParsers          util.PrioritizedSlice /*<BlockParser>*/
   430  	InlineParsers         util.PrioritizedSlice /*<InlineParser>*/
   431  	ParagraphTransformers util.PrioritizedSlice /*<ParagraphTransformer>*/
   432  	ASTTransformers       util.PrioritizedSlice /*<ASTTransformer>*/
   433  	EscapedSpace          bool
   434  }
   435  
   436  // NewConfig returns a new Config.
   437  func NewConfig() *Config {
   438  	return &Config{
   439  		Options:               map[OptionName]interface{}{},
   440  		BlockParsers:          util.PrioritizedSlice{},
   441  		InlineParsers:         util.PrioritizedSlice{},
   442  		ParagraphTransformers: util.PrioritizedSlice{},
   443  		ASTTransformers:       util.PrioritizedSlice{},
   444  	}
   445  }
   446  
   447  // An Option interface is a functional option type for the Parser.
   448  type Option interface {
   449  	SetParserOption(*Config)
   450  }
   451  
   452  // OptionName is a name of parser options.
   453  type OptionName string
   454  
   455  // Attribute is an option name that spacify attributes of elements.
   456  const optAttribute OptionName = "Attribute"
   457  
   458  type withAttribute struct {
   459  }
   460  
   461  func (o *withAttribute) SetParserOption(c *Config) {
   462  	c.Options[optAttribute] = true
   463  }
   464  
   465  // WithAttribute is a functional option that enables custom attributes.
   466  func WithAttribute() Option {
   467  	return &withAttribute{}
   468  }
   469  
   470  // A Parser interface parses Markdown text into AST nodes.
   471  type Parser interface {
   472  	// Parse parses the given Markdown text into AST nodes.
   473  	Parse(reader text.Reader, opts ...ParseOption) ast.Node
   474  
   475  	// AddOption adds the given option to this parser.
   476  	AddOptions(...Option)
   477  }
   478  
   479  // A SetOptioner interface sets the given option to the object.
   480  type SetOptioner interface {
   481  	// SetOption sets the given option to the object.
   482  	// Unacceptable options may be passed.
   483  	// Thus implementations must ignore unacceptable options.
   484  	SetOption(name OptionName, value interface{})
   485  }
   486  
   487  // A BlockParser interface parses a block level element like Paragraph, List,
   488  // Blockquote etc.
   489  type BlockParser interface {
   490  	// Trigger returns a list of characters that triggers Parse method of
   491  	// this parser.
   492  	// If Trigger returns a nil, Open will be called with any lines.
   493  	Trigger() []byte
   494  
   495  	// Open parses the current line and returns a result of parsing.
   496  	//
   497  	// Open must not parse beyond the current line.
   498  	// If Open has been able to parse the current line, Open must advance a reader
   499  	// position by consumed byte length.
   500  	//
   501  	// If Open has not been able to parse the current line, Open should returns
   502  	// (nil, NoChildren). If Open has been able to parse the current line, Open
   503  	// should returns a new Block node and returns HasChildren or NoChildren.
   504  	Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State)
   505  
   506  	// Continue parses the current line and returns a result of parsing.
   507  	//
   508  	// Continue must not parse beyond the current line.
   509  	// If Continue has been able to parse the current line, Continue must advance
   510  	// a reader position by consumed byte length.
   511  	//
   512  	// If Continue has not been able to parse the current line, Continue should
   513  	// returns Close. If Continue has been able to parse the current line,
   514  	// Continue should returns (Continue | NoChildren) or
   515  	// (Continue | HasChildren)
   516  	Continue(node ast.Node, reader text.Reader, pc Context) State
   517  
   518  	// Close will be called when the parser returns Close.
   519  	Close(node ast.Node, reader text.Reader, pc Context)
   520  
   521  	// CanInterruptParagraph returns true if the parser can interrupt paragraphs,
   522  	// otherwise false.
   523  	CanInterruptParagraph() bool
   524  
   525  	// CanAcceptIndentedLine returns true if the parser can open new node when
   526  	// the given line is being indented more than 3 spaces.
   527  	CanAcceptIndentedLine() bool
   528  }
   529  
   530  // An InlineParser interface parses an inline level element like CodeSpan, Link etc.
   531  type InlineParser interface {
   532  	// Trigger returns a list of characters that triggers Parse method of
   533  	// this parser.
   534  	// Trigger characters must be a punctuation or a halfspace.
   535  	// Halfspaces triggers this parser when character is any spaces characters or
   536  	// a head of line
   537  	Trigger() []byte
   538  
   539  	// Parse parse the given block into an inline node.
   540  	//
   541  	// Parse can parse beyond the current line.
   542  	// If Parse has been able to parse the current line, it must advance a reader
   543  	// position by consumed byte length.
   544  	Parse(parent ast.Node, block text.Reader, pc Context) ast.Node
   545  }
   546  
   547  // A CloseBlocker interface is a callback function that will be
   548  // called when block is closed in the inline parsing.
   549  type CloseBlocker interface {
   550  	// CloseBlock will be called when a block is closed.
   551  	CloseBlock(parent ast.Node, block text.Reader, pc Context)
   552  }
   553  
   554  // A ParagraphTransformer transforms parsed Paragraph nodes.
   555  // For example, link references are searched in parsed Paragraphs.
   556  type ParagraphTransformer interface {
   557  	// Transform transforms the given paragraph.
   558  	Transform(node *ast.Paragraph, reader text.Reader, pc Context)
   559  }
   560  
   561  // ASTTransformer transforms entire Markdown document AST tree.
   562  type ASTTransformer interface {
   563  	// Transform transforms the given AST tree.
   564  	Transform(node *ast.Document, reader text.Reader, pc Context)
   565  }
   566  
   567  // DefaultBlockParsers returns a new list of default BlockParsers.
   568  // Priorities of default BlockParsers are:
   569  //
   570  //	SetextHeadingParser, 100
   571  //	ThematicBreakParser, 200
   572  //	ListParser, 300
   573  //	ListItemParser, 400
   574  //	CodeBlockParser, 500
   575  //	ATXHeadingParser, 600
   576  //	FencedCodeBlockParser, 700
   577  //	BlockquoteParser, 800
   578  //	HTMLBlockParser, 900
   579  //	ParagraphParser, 1000
   580  func DefaultBlockParsers() []util.PrioritizedValue {
   581  	return []util.PrioritizedValue{
   582  		util.Prioritized(NewSetextHeadingParser(), 100),
   583  		util.Prioritized(NewThematicBreakParser(), 200),
   584  		util.Prioritized(NewListParser(), 300),
   585  		util.Prioritized(NewListItemParser(), 400),
   586  		util.Prioritized(NewCodeBlockParser(), 500),
   587  		util.Prioritized(NewATXHeadingParser(), 600),
   588  		util.Prioritized(NewFencedCodeBlockParser(), 700),
   589  		util.Prioritized(NewBlockquoteParser(), 800),
   590  		util.Prioritized(NewHTMLBlockParser(), 900),
   591  		util.Prioritized(NewParagraphParser(), 1000),
   592  	}
   593  }
   594  
   595  // DefaultInlineParsers returns a new list of default InlineParsers.
   596  // Priorities of default InlineParsers are:
   597  //
   598  //	CodeSpanParser, 100
   599  //	LinkParser, 200
   600  //	AutoLinkParser, 300
   601  //	RawHTMLParser, 400
   602  //	EmphasisParser, 500
   603  func DefaultInlineParsers() []util.PrioritizedValue {
   604  	return []util.PrioritizedValue{
   605  		util.Prioritized(NewCodeSpanParser(), 100),
   606  		util.Prioritized(NewLinkParser(), 200),
   607  		util.Prioritized(NewAutoLinkParser(), 300),
   608  		util.Prioritized(NewRawHTMLParser(), 400),
   609  		util.Prioritized(NewEmphasisParser(), 500),
   610  	}
   611  }
   612  
   613  // DefaultParagraphTransformers returns a new list of default ParagraphTransformers.
   614  // Priorities of default ParagraphTransformers are:
   615  //
   616  //	LinkReferenceParagraphTransformer, 100
   617  func DefaultParagraphTransformers() []util.PrioritizedValue {
   618  	return []util.PrioritizedValue{
   619  		util.Prioritized(LinkReferenceParagraphTransformer, 100),
   620  	}
   621  }
   622  
   623  // A Block struct holds a node and correspond parser pair.
   624  type Block struct {
   625  	// Node is a BlockNode.
   626  	Node ast.Node
   627  	// Parser is a BlockParser.
   628  	Parser BlockParser
   629  }
   630  
   631  type parser struct {
   632  	options               map[OptionName]interface{}
   633  	blockParsers          [256][]BlockParser
   634  	freeBlockParsers      []BlockParser
   635  	inlineParsers         [256][]InlineParser
   636  	closeBlockers         []CloseBlocker
   637  	paragraphTransformers []ParagraphTransformer
   638  	astTransformers       []ASTTransformer
   639  	escapedSpace          bool
   640  	config                *Config
   641  	initSync              sync.Once
   642  }
   643  
   644  type withBlockParsers struct {
   645  	value []util.PrioritizedValue
   646  }
   647  
   648  func (o *withBlockParsers) SetParserOption(c *Config) {
   649  	c.BlockParsers = append(c.BlockParsers, o.value...)
   650  }
   651  
   652  // WithBlockParsers is a functional option that allow you to add
   653  // BlockParsers to the parser.
   654  func WithBlockParsers(bs ...util.PrioritizedValue) Option {
   655  	return &withBlockParsers{bs}
   656  }
   657  
   658  type withInlineParsers struct {
   659  	value []util.PrioritizedValue
   660  }
   661  
   662  func (o *withInlineParsers) SetParserOption(c *Config) {
   663  	c.InlineParsers = append(c.InlineParsers, o.value...)
   664  }
   665  
   666  // WithInlineParsers is a functional option that allow you to add
   667  // InlineParsers to the parser.
   668  func WithInlineParsers(bs ...util.PrioritizedValue) Option {
   669  	return &withInlineParsers{bs}
   670  }
   671  
   672  type withParagraphTransformers struct {
   673  	value []util.PrioritizedValue
   674  }
   675  
   676  func (o *withParagraphTransformers) SetParserOption(c *Config) {
   677  	c.ParagraphTransformers = append(c.ParagraphTransformers, o.value...)
   678  }
   679  
   680  // WithParagraphTransformers is a functional option that allow you to add
   681  // ParagraphTransformers to the parser.
   682  func WithParagraphTransformers(ps ...util.PrioritizedValue) Option {
   683  	return &withParagraphTransformers{ps}
   684  }
   685  
   686  type withASTTransformers struct {
   687  	value []util.PrioritizedValue
   688  }
   689  
   690  func (o *withASTTransformers) SetParserOption(c *Config) {
   691  	c.ASTTransformers = append(c.ASTTransformers, o.value...)
   692  }
   693  
   694  // WithASTTransformers is a functional option that allow you to add
   695  // ASTTransformers to the parser.
   696  func WithASTTransformers(ps ...util.PrioritizedValue) Option {
   697  	return &withASTTransformers{ps}
   698  }
   699  
   700  type withEscapedSpace struct {
   701  }
   702  
   703  func (o *withEscapedSpace) SetParserOption(c *Config) {
   704  	c.EscapedSpace = true
   705  }
   706  
   707  // WithEscapedSpace is a functional option indicates that a '\' escaped half-space(0x20) should not trigger parsers.
   708  func WithEscapedSpace() Option {
   709  	return &withEscapedSpace{}
   710  }
   711  
   712  type withOption struct {
   713  	name  OptionName
   714  	value interface{}
   715  }
   716  
   717  func (o *withOption) SetParserOption(c *Config) {
   718  	c.Options[o.name] = o.value
   719  }
   720  
   721  // WithOption is a functional option that allow you to set
   722  // an arbitrary option to the parser.
   723  func WithOption(name OptionName, value interface{}) Option {
   724  	return &withOption{name, value}
   725  }
   726  
   727  // NewParser returns a new Parser with given options.
   728  func NewParser(options ...Option) Parser {
   729  	config := NewConfig()
   730  	for _, opt := range options {
   731  		opt.SetParserOption(config)
   732  	}
   733  
   734  	p := &parser{
   735  		options: map[OptionName]interface{}{},
   736  		config:  config,
   737  	}
   738  
   739  	return p
   740  }
   741  
   742  func (p *parser) AddOptions(opts ...Option) {
   743  	for _, opt := range opts {
   744  		opt.SetParserOption(p.config)
   745  	}
   746  }
   747  
   748  func (p *parser) addBlockParser(v util.PrioritizedValue, options map[OptionName]interface{}) {
   749  	bp, ok := v.Value.(BlockParser)
   750  	if !ok {
   751  		panic(fmt.Sprintf("%v is not a BlockParser", v.Value))
   752  	}
   753  	tcs := bp.Trigger()
   754  	so, ok := v.Value.(SetOptioner)
   755  	if ok {
   756  		for oname, ovalue := range options {
   757  			so.SetOption(oname, ovalue)
   758  		}
   759  	}
   760  	if tcs == nil {
   761  		p.freeBlockParsers = append(p.freeBlockParsers, bp)
   762  	} else {
   763  		for _, tc := range tcs {
   764  			if p.blockParsers[tc] == nil {
   765  				p.blockParsers[tc] = []BlockParser{}
   766  			}
   767  			p.blockParsers[tc] = append(p.blockParsers[tc], bp)
   768  		}
   769  	}
   770  }
   771  
   772  func (p *parser) addInlineParser(v util.PrioritizedValue, options map[OptionName]interface{}) {
   773  	ip, ok := v.Value.(InlineParser)
   774  	if !ok {
   775  		panic(fmt.Sprintf("%v is not a InlineParser", v.Value))
   776  	}
   777  	tcs := ip.Trigger()
   778  	so, ok := v.Value.(SetOptioner)
   779  	if ok {
   780  		for oname, ovalue := range options {
   781  			so.SetOption(oname, ovalue)
   782  		}
   783  	}
   784  	if cb, ok := ip.(CloseBlocker); ok {
   785  		p.closeBlockers = append(p.closeBlockers, cb)
   786  	}
   787  	for _, tc := range tcs {
   788  		if p.inlineParsers[tc] == nil {
   789  			p.inlineParsers[tc] = []InlineParser{}
   790  		}
   791  		p.inlineParsers[tc] = append(p.inlineParsers[tc], ip)
   792  	}
   793  }
   794  
   795  func (p *parser) addParagraphTransformer(v util.PrioritizedValue, options map[OptionName]interface{}) {
   796  	pt, ok := v.Value.(ParagraphTransformer)
   797  	if !ok {
   798  		panic(fmt.Sprintf("%v is not a ParagraphTransformer", v.Value))
   799  	}
   800  	so, ok := v.Value.(SetOptioner)
   801  	if ok {
   802  		for oname, ovalue := range options {
   803  			so.SetOption(oname, ovalue)
   804  		}
   805  	}
   806  	p.paragraphTransformers = append(p.paragraphTransformers, pt)
   807  }
   808  
   809  func (p *parser) addASTTransformer(v util.PrioritizedValue, options map[OptionName]interface{}) {
   810  	at, ok := v.Value.(ASTTransformer)
   811  	if !ok {
   812  		panic(fmt.Sprintf("%v is not a ASTTransformer", v.Value))
   813  	}
   814  	so, ok := v.Value.(SetOptioner)
   815  	if ok {
   816  		for oname, ovalue := range options {
   817  			so.SetOption(oname, ovalue)
   818  		}
   819  	}
   820  	p.astTransformers = append(p.astTransformers, at)
   821  }
   822  
   823  // A ParseConfig struct is a data structure that holds configuration of the Parser.Parse.
   824  type ParseConfig struct {
   825  	Context Context
   826  }
   827  
   828  // A ParseOption is a functional option type for the Parser.Parse.
   829  type ParseOption func(c *ParseConfig)
   830  
   831  // WithContext is a functional option that allow you to override
   832  // a default context.
   833  func WithContext(context Context) ParseOption {
   834  	return func(c *ParseConfig) {
   835  		c.Context = context
   836  	}
   837  }
   838  
   839  func (p *parser) Parse(reader text.Reader, opts ...ParseOption) ast.Node {
   840  	p.initSync.Do(func() {
   841  		p.config.BlockParsers.Sort()
   842  		for _, v := range p.config.BlockParsers {
   843  			p.addBlockParser(v, p.config.Options)
   844  		}
   845  		for i := range p.blockParsers {
   846  			if p.blockParsers[i] != nil {
   847  				p.blockParsers[i] = append(p.blockParsers[i], p.freeBlockParsers...)
   848  			}
   849  		}
   850  
   851  		p.config.InlineParsers.Sort()
   852  		for _, v := range p.config.InlineParsers {
   853  			p.addInlineParser(v, p.config.Options)
   854  		}
   855  		p.config.ParagraphTransformers.Sort()
   856  		for _, v := range p.config.ParagraphTransformers {
   857  			p.addParagraphTransformer(v, p.config.Options)
   858  		}
   859  		p.config.ASTTransformers.Sort()
   860  		for _, v := range p.config.ASTTransformers {
   861  			p.addASTTransformer(v, p.config.Options)
   862  		}
   863  		p.escapedSpace = p.config.EscapedSpace
   864  		p.config = nil
   865  	})
   866  	c := &ParseConfig{}
   867  	for _, opt := range opts {
   868  		opt(c)
   869  	}
   870  	if c.Context == nil {
   871  		c.Context = NewContext()
   872  	}
   873  	pc := c.Context
   874  	root := ast.NewDocument()
   875  	p.parseBlocks(root, reader, pc)
   876  
   877  	blockReader := text.NewBlockReader(reader.Source(), nil)
   878  	p.walkBlock(root, func(node ast.Node) {
   879  		p.parseBlock(blockReader, node, pc)
   880  	})
   881  	for _, at := range p.astTransformers {
   882  		at.Transform(root, reader, pc)
   883  	}
   884  	// root.Dump(reader.Source(), 0)
   885  	return root
   886  }
   887  
   888  func (p *parser) transformParagraph(node *ast.Paragraph, reader text.Reader, pc Context) bool {
   889  	for _, pt := range p.paragraphTransformers {
   890  		pt.Transform(node, reader, pc)
   891  		if node.Parent() == nil {
   892  			return true
   893  		}
   894  	}
   895  	return false
   896  }
   897  
   898  func (p *parser) closeBlocks(from, to int, reader text.Reader, pc Context) {
   899  	blocks := pc.OpenedBlocks()
   900  	for i := from; i >= to; i-- {
   901  		node := blocks[i].Node
   902  		paragraph, ok := node.(*ast.Paragraph)
   903  		if ok && node.Parent() != nil {
   904  			p.transformParagraph(paragraph, reader, pc)
   905  		}
   906  		if node.Parent() != nil { // closes only if node has not been transformed
   907  			blocks[i].Parser.Close(blocks[i].Node, reader, pc)
   908  		}
   909  	}
   910  	if from == len(blocks)-1 {
   911  		blocks = blocks[0:to]
   912  	} else {
   913  		blocks = append(blocks[0:to], blocks[from+1:]...)
   914  	}
   915  	pc.SetOpenedBlocks(blocks)
   916  }
   917  
   918  type blockOpenResult int
   919  
   920  const (
   921  	paragraphContinuation blockOpenResult = iota + 1
   922  	newBlocksOpened
   923  	noBlocksOpened
   924  )
   925  
   926  func (p *parser) openBlocks(parent ast.Node, blankLine bool, reader text.Reader, pc Context) blockOpenResult {
   927  	result := blockOpenResult(noBlocksOpened)
   928  	continuable := false
   929  	lastBlock := pc.LastOpenedBlock()
   930  	if lastBlock.Node != nil {
   931  		continuable = ast.IsParagraph(lastBlock.Node)
   932  	}
   933  retry:
   934  	var bps []BlockParser
   935  	line, _ := reader.PeekLine()
   936  	w, pos := util.IndentWidth(line, reader.LineOffset())
   937  	if w >= len(line) {
   938  		pc.SetBlockOffset(-1)
   939  		pc.SetBlockIndent(-1)
   940  	} else {
   941  		pc.SetBlockOffset(pos)
   942  		pc.SetBlockIndent(w)
   943  	}
   944  	if line == nil || line[0] == '\n' {
   945  		goto continuable
   946  	}
   947  	bps = p.freeBlockParsers
   948  	if pos < len(line) {
   949  		bps = p.blockParsers[line[pos]]
   950  		if bps == nil {
   951  			bps = p.freeBlockParsers
   952  		}
   953  	}
   954  	if bps == nil {
   955  		goto continuable
   956  	}
   957  
   958  	for _, bp := range bps {
   959  		if continuable && result == noBlocksOpened && !bp.CanInterruptParagraph() {
   960  			continue
   961  		}
   962  		if w > 3 && !bp.CanAcceptIndentedLine() {
   963  			continue
   964  		}
   965  		lastBlock = pc.LastOpenedBlock()
   966  		last := lastBlock.Node
   967  		node, state := bp.Open(parent, reader, pc)
   968  		if node != nil {
   969  			// Parser requires last node to be a paragraph.
   970  			// With table extension:
   971  			//
   972  			//     0
   973  			//     -:
   974  			//     -
   975  			//
   976  			// '-' on 3rd line seems a Setext heading because 1st and 2nd lines
   977  			// are being paragraph when the Settext heading parser tries to parse the 3rd
   978  			// line.
   979  			// But 1st line and 2nd line are a table. Thus this paragraph will be transformed
   980  			// by a paragraph transformer. So this text should be converted to a table and
   981  			// an empty list.
   982  			if state&RequireParagraph != 0 {
   983  				if last == parent.LastChild() {
   984  					// Opened paragraph may be transformed by ParagraphTransformers in
   985  					// closeBlocks().
   986  					lastBlock.Parser.Close(last, reader, pc)
   987  					blocks := pc.OpenedBlocks()
   988  					pc.SetOpenedBlocks(blocks[0 : len(blocks)-1])
   989  					if p.transformParagraph(last.(*ast.Paragraph), reader, pc) {
   990  						// Paragraph has been transformed.
   991  						// So this parser is considered as failing.
   992  						continuable = false
   993  						goto retry
   994  					}
   995  				}
   996  			}
   997  			node.SetBlankPreviousLines(blankLine)
   998  			if last != nil && last.Parent() == nil {
   999  				lastPos := len(pc.OpenedBlocks()) - 1
  1000  				p.closeBlocks(lastPos, lastPos, reader, pc)
  1001  			}
  1002  			parent.AppendChild(parent, node)
  1003  			result = newBlocksOpened
  1004  			be := Block{node, bp}
  1005  			pc.SetOpenedBlocks(append(pc.OpenedBlocks(), be))
  1006  			if state&HasChildren != 0 {
  1007  				parent = node
  1008  				goto retry // try child block
  1009  			}
  1010  			break // no children, can not open more blocks on this line
  1011  		}
  1012  	}
  1013  
  1014  continuable:
  1015  	if result == noBlocksOpened && continuable {
  1016  		state := lastBlock.Parser.Continue(lastBlock.Node, reader, pc)
  1017  		if state&Continue != 0 {
  1018  			result = paragraphContinuation
  1019  		}
  1020  	}
  1021  	return result
  1022  }
  1023  
  1024  type lineStat struct {
  1025  	lineNum int
  1026  	level   int
  1027  	isBlank bool
  1028  }
  1029  
  1030  func isBlankLine(lineNum, level int, stats []lineStat) bool {
  1031  	ret := true
  1032  	for i := len(stats) - 1 - level; i >= 0; i-- {
  1033  		ret = false
  1034  		s := stats[i]
  1035  		if s.lineNum == lineNum {
  1036  			if s.level < level && s.isBlank {
  1037  				return true
  1038  			} else if s.level == level {
  1039  				return s.isBlank
  1040  			}
  1041  		}
  1042  		if s.lineNum < lineNum {
  1043  			return ret
  1044  		}
  1045  	}
  1046  	return ret
  1047  }
  1048  
  1049  func (p *parser) parseBlocks(parent ast.Node, reader text.Reader, pc Context) {
  1050  	pc.SetOpenedBlocks([]Block{})
  1051  	blankLines := make([]lineStat, 0, 128)
  1052  	isBlank := false
  1053  	for { // process blocks separated by blank lines
  1054  		_, lines, ok := reader.SkipBlankLines()
  1055  		if !ok {
  1056  			return
  1057  		}
  1058  		lineNum, _ := reader.Position()
  1059  		if lines != 0 {
  1060  			blankLines = blankLines[0:0]
  1061  			l := len(pc.OpenedBlocks())
  1062  			for i := 0; i < l; i++ {
  1063  				blankLines = append(blankLines, lineStat{lineNum - 1, i, lines != 0})
  1064  			}
  1065  		}
  1066  		isBlank = isBlankLine(lineNum-1, 0, blankLines)
  1067  		// first, we try to open blocks
  1068  		if p.openBlocks(parent, isBlank, reader, pc) != newBlocksOpened {
  1069  			return
  1070  		}
  1071  		reader.AdvanceLine()
  1072  		for { // process opened blocks line by line
  1073  			openedBlocks := pc.OpenedBlocks()
  1074  			l := len(openedBlocks)
  1075  			if l == 0 {
  1076  				break
  1077  			}
  1078  			lastIndex := l - 1
  1079  			for i := 0; i < l; i++ {
  1080  				be := openedBlocks[i]
  1081  				line, _ := reader.PeekLine()
  1082  				if line == nil {
  1083  					p.closeBlocks(lastIndex, 0, reader, pc)
  1084  					reader.AdvanceLine()
  1085  					return
  1086  				}
  1087  				lineNum, _ := reader.Position()
  1088  				blankLines = append(blankLines, lineStat{lineNum, i, util.IsBlank(line)})
  1089  				// If node is a paragraph, p.openBlocks determines whether it is continuable.
  1090  				// So we do not process paragraphs here.
  1091  				if !ast.IsParagraph(be.Node) {
  1092  					state := be.Parser.Continue(be.Node, reader, pc)
  1093  					if state&Continue != 0 {
  1094  						// When current node is a container block and has no children,
  1095  						// we try to open new child nodes
  1096  						if state&HasChildren != 0 && i == lastIndex {
  1097  							isBlank = isBlankLine(lineNum-1, i, blankLines)
  1098  							p.openBlocks(be.Node, isBlank, reader, pc)
  1099  							break
  1100  						}
  1101  						continue
  1102  					}
  1103  				}
  1104  				// current node may be closed or lazy continuation
  1105  				isBlank = isBlankLine(lineNum-1, i, blankLines)
  1106  				thisParent := parent
  1107  				if i != 0 {
  1108  					thisParent = openedBlocks[i-1].Node
  1109  				}
  1110  				lastNode := openedBlocks[lastIndex].Node
  1111  				result := p.openBlocks(thisParent, isBlank, reader, pc)
  1112  				if result != paragraphContinuation {
  1113  					// lastNode is a paragraph and was transformed by the paragraph
  1114  					// transformers.
  1115  					if openedBlocks[lastIndex].Node != lastNode {
  1116  						lastIndex--
  1117  					}
  1118  					p.closeBlocks(lastIndex, i, reader, pc)
  1119  				}
  1120  				break
  1121  			}
  1122  
  1123  			reader.AdvanceLine()
  1124  		}
  1125  	}
  1126  }
  1127  
  1128  func (p *parser) walkBlock(block ast.Node, cb func(node ast.Node)) {
  1129  	for c := block.FirstChild(); c != nil; c = c.NextSibling() {
  1130  		p.walkBlock(c, cb)
  1131  	}
  1132  	cb(block)
  1133  }
  1134  
  1135  const (
  1136  	lineBreakHard uint8 = 1 << iota
  1137  	lineBreakSoft
  1138  	lineBreakVisible
  1139  )
  1140  
  1141  func (p *parser) parseBlock(block text.BlockReader, parent ast.Node, pc Context) {
  1142  	if parent.IsRaw() {
  1143  		return
  1144  	}
  1145  	escaped := false
  1146  	source := block.Source()
  1147  	block.Reset(parent.Lines())
  1148  	for {
  1149  	retry:
  1150  		line, _ := block.PeekLine()
  1151  		if line == nil {
  1152  			break
  1153  		}
  1154  		lineLength := len(line)
  1155  		var lineBreakFlags uint8 = 0
  1156  		hasNewLine := line[lineLength-1] == '\n'
  1157  		if ((lineLength >= 3 && line[lineLength-2] == '\\' && line[lineLength-3] != '\\') || (lineLength == 2 && line[lineLength-2] == '\\')) && hasNewLine { // ends with \\n
  1158  			lineLength -= 2
  1159  			lineBreakFlags |= lineBreakHard | lineBreakVisible
  1160  		} else if ((lineLength >= 4 && line[lineLength-3] == '\\' && line[lineLength-2] == '\r' && line[lineLength-4] != '\\') || (lineLength == 3 && line[lineLength-3] == '\\' && line[lineLength-2] == '\r')) && hasNewLine { // ends with \\r\n
  1161  			lineLength -= 3
  1162  			lineBreakFlags |= lineBreakHard | lineBreakVisible
  1163  		} else if lineLength >= 3 && line[lineLength-3] == ' ' && line[lineLength-2] == ' ' && hasNewLine { // ends with [space][space]\n
  1164  			lineLength -= 3
  1165  			lineBreakFlags |= lineBreakHard
  1166  		} else if lineLength >= 4 && line[lineLength-4] == ' ' && line[lineLength-3] == ' ' && line[lineLength-2] == '\r' && hasNewLine { // ends with [space][space]\r\n
  1167  			lineLength -= 4
  1168  			lineBreakFlags |= lineBreakHard
  1169  		} else if hasNewLine {
  1170  			// If the line ends with a newline character, but it is not a hardlineBreak, then it is a softLinebreak
  1171  			// If the line ends with a hardlineBreak, then it cannot end with a softLinebreak
  1172  			// See https://spec.commonmark.org/0.30/#soft-line-breaks
  1173  			lineBreakFlags |= lineBreakSoft
  1174  		}
  1175  
  1176  		l, startPosition := block.Position()
  1177  		n := 0
  1178  		for i := 0; i < lineLength; i++ {
  1179  			c := line[i]
  1180  			if c == '\n' {
  1181  				break
  1182  			}
  1183  			isSpace := util.IsSpace(c) && c != '\r' && c != '\n'
  1184  			isPunct := util.IsPunct(c)
  1185  			if (isPunct && !escaped) || isSpace && !(escaped && p.escapedSpace) || i == 0 {
  1186  				parserChar := c
  1187  				if isSpace || (i == 0 && !isPunct) {
  1188  					parserChar = ' '
  1189  				}
  1190  				ips := p.inlineParsers[parserChar]
  1191  				if ips != nil {
  1192  					block.Advance(n)
  1193  					n = 0
  1194  					savedLine, savedPosition := block.Position()
  1195  					if i != 0 {
  1196  						_, currentPosition := block.Position()
  1197  						ast.MergeOrAppendTextSegment(parent, startPosition.Between(currentPosition))
  1198  						_, startPosition = block.Position()
  1199  					}
  1200  					var inlineNode ast.Node
  1201  					for _, ip := range ips {
  1202  						inlineNode = ip.Parse(parent, block, pc)
  1203  						if inlineNode != nil {
  1204  							break
  1205  						}
  1206  						block.SetPosition(savedLine, savedPosition)
  1207  					}
  1208  					if inlineNode != nil {
  1209  						parent.AppendChild(parent, inlineNode)
  1210  						goto retry
  1211  					}
  1212  				}
  1213  			}
  1214  			if escaped {
  1215  				escaped = false
  1216  				n++
  1217  				continue
  1218  			}
  1219  
  1220  			if c == '\\' {
  1221  				escaped = true
  1222  				n++
  1223  				continue
  1224  			}
  1225  
  1226  			escaped = false
  1227  			n++
  1228  		}
  1229  		if n != 0 {
  1230  			block.Advance(n)
  1231  		}
  1232  		currentL, currentPosition := block.Position()
  1233  		if l != currentL {
  1234  			continue
  1235  		}
  1236  		diff := startPosition.Between(currentPosition)
  1237  		var text *ast.Text
  1238  		if lineBreakFlags&(lineBreakHard|lineBreakVisible) == lineBreakHard|lineBreakVisible {
  1239  			text = ast.NewTextSegment(diff)
  1240  		} else {
  1241  			text = ast.NewTextSegment(diff.TrimRightSpace(source))
  1242  		}
  1243  		text.SetSoftLineBreak(lineBreakFlags&lineBreakSoft != 0)
  1244  		text.SetHardLineBreak(lineBreakFlags&lineBreakHard != 0)
  1245  		parent.AppendChild(parent, text)
  1246  		block.AdvanceLine()
  1247  	}
  1248  
  1249  	ProcessDelimiters(nil, pc)
  1250  	for _, ip := range p.closeBlockers {
  1251  		ip.CloseBlock(parent, block, pc)
  1252  	}
  1253  }
  1254  

View as plain text