...

Source file src/github.com/yuin/goldmark/parser/html_block.go

Documentation: github.com/yuin/goldmark/parser

     1  package parser
     2  
     3  import (
     4  	"bytes"
     5  	"regexp"
     6  	"strings"
     7  
     8  	"github.com/yuin/goldmark/ast"
     9  	"github.com/yuin/goldmark/text"
    10  	"github.com/yuin/goldmark/util"
    11  )
    12  
    13  var allowedBlockTags = map[string]bool{
    14  	"address":    true,
    15  	"article":    true,
    16  	"aside":      true,
    17  	"base":       true,
    18  	"basefont":   true,
    19  	"blockquote": true,
    20  	"body":       true,
    21  	"caption":    true,
    22  	"center":     true,
    23  	"col":        true,
    24  	"colgroup":   true,
    25  	"dd":         true,
    26  	"details":    true,
    27  	"dialog":     true,
    28  	"dir":        true,
    29  	"div":        true,
    30  	"dl":         true,
    31  	"dt":         true,
    32  	"fieldset":   true,
    33  	"figcaption": true,
    34  	"figure":     true,
    35  	"footer":     true,
    36  	"form":       true,
    37  	"frame":      true,
    38  	"frameset":   true,
    39  	"h1":         true,
    40  	"h2":         true,
    41  	"h3":         true,
    42  	"h4":         true,
    43  	"h5":         true,
    44  	"h6":         true,
    45  	"head":       true,
    46  	"header":     true,
    47  	"hr":         true,
    48  	"html":       true,
    49  	"iframe":     true,
    50  	"legend":     true,
    51  	"li":         true,
    52  	"link":       true,
    53  	"main":       true,
    54  	"menu":       true,
    55  	"menuitem":   true,
    56  	"meta":       true,
    57  	"nav":        true,
    58  	"noframes":   true,
    59  	"ol":         true,
    60  	"optgroup":   true,
    61  	"option":     true,
    62  	"p":          true,
    63  	"param":      true,
    64  	"section":    true,
    65  	"source":     true,
    66  	"summary":    true,
    67  	"table":      true,
    68  	"tbody":      true,
    69  	"td":         true,
    70  	"tfoot":      true,
    71  	"th":         true,
    72  	"thead":      true,
    73  	"title":      true,
    74  	"tr":         true,
    75  	"track":      true,
    76  	"ul":         true,
    77  }
    78  
    79  var htmlBlockType1OpenRegexp = regexp.MustCompile(`(?i)^[ ]{0,3}<(script|pre|style|textarea)(?:\s.*|>.*|/>.*|)(?:\r\n|\n)?$`)
    80  var htmlBlockType1CloseRegexp = regexp.MustCompile(`(?i)^.*</(?:script|pre|style|textarea)>.*`)
    81  
    82  var htmlBlockType2OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<!\-\-`)
    83  var htmlBlockType2Close = []byte{'-', '-', '>'}
    84  
    85  var htmlBlockType3OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<\?`)
    86  var htmlBlockType3Close = []byte{'?', '>'}
    87  
    88  var htmlBlockType4OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<![A-Z]+.*(?:\r\n|\n)?$`)
    89  var htmlBlockType4Close = []byte{'>'}
    90  
    91  var htmlBlockType5OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<\!\[CDATA\[`)
    92  var htmlBlockType5Close = []byte{']', ']', '>'}
    93  
    94  var htmlBlockType6Regexp = regexp.MustCompile(`^[ ]{0,3}<(?:/[ ]*)?([a-zA-Z]+[a-zA-Z0-9\-]*)(?:[ ].*|>.*|/>.*|)(?:\r\n|\n)?$`)
    95  
    96  var htmlBlockType7Regexp = regexp.MustCompile(`^[ ]{0,3}<(/[ ]*)?([a-zA-Z]+[a-zA-Z0-9\-]*)(` + attributePattern + `*)[ ]*(?:>|/>)[ ]*(?:\r\n|\n)?$`)
    97  
    98  type htmlBlockParser struct {
    99  }
   100  
   101  var defaultHTMLBlockParser = &htmlBlockParser{}
   102  
   103  // NewHTMLBlockParser return a new BlockParser that can parse html
   104  // blocks.
   105  func NewHTMLBlockParser() BlockParser {
   106  	return defaultHTMLBlockParser
   107  }
   108  
   109  func (b *htmlBlockParser) Trigger() []byte {
   110  	return []byte{'<'}
   111  }
   112  
   113  func (b *htmlBlockParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {
   114  	var node *ast.HTMLBlock
   115  	line, segment := reader.PeekLine()
   116  	last := pc.LastOpenedBlock().Node
   117  	if pos := pc.BlockOffset(); pos < 0 || line[pos] != '<' {
   118  		return nil, NoChildren
   119  	}
   120  
   121  	if m := htmlBlockType1OpenRegexp.FindSubmatchIndex(line); m != nil {
   122  		node = ast.NewHTMLBlock(ast.HTMLBlockType1)
   123  	} else if htmlBlockType2OpenRegexp.Match(line) {
   124  		node = ast.NewHTMLBlock(ast.HTMLBlockType2)
   125  	} else if htmlBlockType3OpenRegexp.Match(line) {
   126  		node = ast.NewHTMLBlock(ast.HTMLBlockType3)
   127  	} else if htmlBlockType4OpenRegexp.Match(line) {
   128  		node = ast.NewHTMLBlock(ast.HTMLBlockType4)
   129  	} else if htmlBlockType5OpenRegexp.Match(line) {
   130  		node = ast.NewHTMLBlock(ast.HTMLBlockType5)
   131  	} else if match := htmlBlockType7Regexp.FindSubmatchIndex(line); match != nil {
   132  		isCloseTag := match[2] > -1 && bytes.Equal(line[match[2]:match[3]], []byte("/"))
   133  		hasAttr := match[6] != match[7]
   134  		tagName := strings.ToLower(string(line[match[4]:match[5]]))
   135  		_, ok := allowedBlockTags[tagName]
   136  		if ok {
   137  			node = ast.NewHTMLBlock(ast.HTMLBlockType6)
   138  		} else if tagName != "script" && tagName != "style" && tagName != "pre" && !ast.IsParagraph(last) && !(isCloseTag && hasAttr) { // type 7 can not interrupt paragraph
   139  			node = ast.NewHTMLBlock(ast.HTMLBlockType7)
   140  		}
   141  	}
   142  	if node == nil {
   143  		if match := htmlBlockType6Regexp.FindSubmatchIndex(line); match != nil {
   144  			tagName := string(line[match[2]:match[3]])
   145  			_, ok := allowedBlockTags[strings.ToLower(tagName)]
   146  			if ok {
   147  				node = ast.NewHTMLBlock(ast.HTMLBlockType6)
   148  			}
   149  		}
   150  	}
   151  	if node != nil {
   152  		reader.Advance(segment.Len() - 1)
   153  		node.Lines().Append(segment)
   154  		return node, NoChildren
   155  	}
   156  	return nil, NoChildren
   157  }
   158  
   159  func (b *htmlBlockParser) Continue(node ast.Node, reader text.Reader, pc Context) State {
   160  	htmlBlock := node.(*ast.HTMLBlock)
   161  	lines := htmlBlock.Lines()
   162  	line, segment := reader.PeekLine()
   163  	var closurePattern []byte
   164  
   165  	switch htmlBlock.HTMLBlockType {
   166  	case ast.HTMLBlockType1:
   167  		if lines.Len() == 1 {
   168  			firstLine := lines.At(0)
   169  			if htmlBlockType1CloseRegexp.Match(firstLine.Value(reader.Source())) {
   170  				return Close
   171  			}
   172  		}
   173  		if htmlBlockType1CloseRegexp.Match(line) {
   174  			htmlBlock.ClosureLine = segment
   175  			reader.Advance(segment.Len() - 1)
   176  			return Close
   177  		}
   178  	case ast.HTMLBlockType2:
   179  		closurePattern = htmlBlockType2Close
   180  		fallthrough
   181  	case ast.HTMLBlockType3:
   182  		if closurePattern == nil {
   183  			closurePattern = htmlBlockType3Close
   184  		}
   185  		fallthrough
   186  	case ast.HTMLBlockType4:
   187  		if closurePattern == nil {
   188  			closurePattern = htmlBlockType4Close
   189  		}
   190  		fallthrough
   191  	case ast.HTMLBlockType5:
   192  		if closurePattern == nil {
   193  			closurePattern = htmlBlockType5Close
   194  		}
   195  
   196  		if lines.Len() == 1 {
   197  			firstLine := lines.At(0)
   198  			if bytes.Contains(firstLine.Value(reader.Source()), closurePattern) {
   199  				return Close
   200  			}
   201  		}
   202  		if bytes.Contains(line, closurePattern) {
   203  			htmlBlock.ClosureLine = segment
   204  			reader.Advance(segment.Len())
   205  			return Close
   206  		}
   207  
   208  	case ast.HTMLBlockType6, ast.HTMLBlockType7:
   209  		if util.IsBlank(line) {
   210  			return Close
   211  		}
   212  	}
   213  	node.Lines().Append(segment)
   214  	reader.Advance(segment.Len() - 1)
   215  	return Continue | NoChildren
   216  }
   217  
   218  func (b *htmlBlockParser) Close(node ast.Node, reader text.Reader, pc Context) {
   219  	// nothing to do
   220  }
   221  
   222  func (b *htmlBlockParser) CanInterruptParagraph() bool {
   223  	return true
   224  }
   225  
   226  func (b *htmlBlockParser) CanAcceptIndentedLine() bool {
   227  	return false
   228  }
   229  

View as plain text