...

Source file src/github.com/yuin/goldmark/parser/raw_html.go

Documentation: github.com/yuin/goldmark/parser

     1  package parser
     2  
     3  import (
     4  	"bytes"
     5  	"regexp"
     6  
     7  	"github.com/yuin/goldmark/ast"
     8  	"github.com/yuin/goldmark/text"
     9  	"github.com/yuin/goldmark/util"
    10  )
    11  
    12  type rawHTMLParser struct {
    13  }
    14  
    15  var defaultRawHTMLParser = &rawHTMLParser{}
    16  
    17  // NewRawHTMLParser return a new InlineParser that can parse
    18  // inline htmls
    19  func NewRawHTMLParser() InlineParser {
    20  	return defaultRawHTMLParser
    21  }
    22  
    23  func (s *rawHTMLParser) Trigger() []byte {
    24  	return []byte{'<'}
    25  }
    26  
    27  func (s *rawHTMLParser) Parse(parent ast.Node, block text.Reader, pc Context) ast.Node {
    28  	line, _ := block.PeekLine()
    29  	if len(line) > 1 && util.IsAlphaNumeric(line[1]) {
    30  		return s.parseMultiLineRegexp(openTagRegexp, block, pc)
    31  	}
    32  	if len(line) > 2 && line[1] == '/' && util.IsAlphaNumeric(line[2]) {
    33  		return s.parseMultiLineRegexp(closeTagRegexp, block, pc)
    34  	}
    35  	if bytes.HasPrefix(line, openComment) {
    36  		return s.parseComment(block, pc)
    37  	}
    38  	if bytes.HasPrefix(line, openProcessingInstruction) {
    39  		return s.parseUntil(block, closeProcessingInstruction, pc)
    40  	}
    41  	if len(line) > 2 && line[1] == '!' && line[2] >= 'A' && line[2] <= 'Z' {
    42  		return s.parseUntil(block, closeDecl, pc)
    43  	}
    44  	if bytes.HasPrefix(line, openCDATA) {
    45  		return s.parseUntil(block, closeCDATA, pc)
    46  	}
    47  	return nil
    48  }
    49  
    50  var tagnamePattern = `([A-Za-z][A-Za-z0-9-]*)`
    51  
    52  var attributePattern = `(?:[\r\n \t]+[a-zA-Z_:][a-zA-Z0-9:._-]*(?:[\r\n \t]*=[\r\n \t]*(?:[^\"'=<>` + "`" + `\x00-\x20]+|'[^']*'|"[^"]*"))?)`
    53  var openTagRegexp = regexp.MustCompile("^<" + tagnamePattern + attributePattern + `*[ \t]*/?>`)
    54  var closeTagRegexp = regexp.MustCompile("^</" + tagnamePattern + `\s*>`)
    55  
    56  var openProcessingInstruction = []byte("<?")
    57  var closeProcessingInstruction = []byte("?>")
    58  var openCDATA = []byte("<![CDATA[")
    59  var closeCDATA = []byte("]]>")
    60  var closeDecl = []byte(">")
    61  var emptyComment = []byte("<!---->")
    62  var invalidComment1 = []byte("<!-->")
    63  var invalidComment2 = []byte("<!--->")
    64  var openComment = []byte("<!--")
    65  var closeComment = []byte("-->")
    66  var doubleHyphen = []byte("--")
    67  
    68  func (s *rawHTMLParser) parseComment(block text.Reader, pc Context) ast.Node {
    69  	savedLine, savedSegment := block.Position()
    70  	node := ast.NewRawHTML()
    71  	line, segment := block.PeekLine()
    72  	if bytes.HasPrefix(line, emptyComment) {
    73  		node.Segments.Append(segment.WithStop(segment.Start + len(emptyComment)))
    74  		block.Advance(len(emptyComment))
    75  		return node
    76  	}
    77  	if bytes.HasPrefix(line, invalidComment1) || bytes.HasPrefix(line, invalidComment2) {
    78  		return nil
    79  	}
    80  	offset := len(openComment)
    81  	line = line[offset:]
    82  	for {
    83  		hindex := bytes.Index(line, doubleHyphen)
    84  		if hindex > -1 {
    85  			hindex += offset
    86  		}
    87  		index := bytes.Index(line, closeComment) + offset
    88  		if index > -1 && hindex == index {
    89  			if index == 0 || len(line) < 2 || line[index-offset-1] != '-' {
    90  				node.Segments.Append(segment.WithStop(segment.Start + index + len(closeComment)))
    91  				block.Advance(index + len(closeComment))
    92  				return node
    93  			}
    94  		}
    95  		if hindex > 0 {
    96  			break
    97  		}
    98  		node.Segments.Append(segment)
    99  		block.AdvanceLine()
   100  		line, segment = block.PeekLine()
   101  		offset = 0
   102  		if line == nil {
   103  			break
   104  		}
   105  	}
   106  	block.SetPosition(savedLine, savedSegment)
   107  	return nil
   108  }
   109  
   110  func (s *rawHTMLParser) parseUntil(block text.Reader, closer []byte, pc Context) ast.Node {
   111  	savedLine, savedSegment := block.Position()
   112  	node := ast.NewRawHTML()
   113  	for {
   114  		line, segment := block.PeekLine()
   115  		if line == nil {
   116  			break
   117  		}
   118  		index := bytes.Index(line, closer)
   119  		if index > -1 {
   120  			node.Segments.Append(segment.WithStop(segment.Start + index + len(closer)))
   121  			block.Advance(index + len(closer))
   122  			return node
   123  		}
   124  		node.Segments.Append(segment)
   125  		block.AdvanceLine()
   126  	}
   127  	block.SetPosition(savedLine, savedSegment)
   128  	return nil
   129  }
   130  
   131  func (s *rawHTMLParser) parseMultiLineRegexp(reg *regexp.Regexp, block text.Reader, pc Context) ast.Node {
   132  	sline, ssegment := block.Position()
   133  	if block.Match(reg) {
   134  		node := ast.NewRawHTML()
   135  		eline, esegment := block.Position()
   136  		block.SetPosition(sline, ssegment)
   137  		for {
   138  			line, segment := block.PeekLine()
   139  			if line == nil {
   140  				break
   141  			}
   142  			l, _ := block.Position()
   143  			start := segment.Start
   144  			if l == sline {
   145  				start = ssegment.Start
   146  			}
   147  			end := segment.Stop
   148  			if l == eline {
   149  				end = esegment.Start
   150  			}
   151  
   152  			node.Segments.Append(text.NewSegment(start, end))
   153  			if l == eline {
   154  				block.Advance(end - start)
   155  				break
   156  			} else {
   157  				block.AdvanceLine()
   158  			}
   159  		}
   160  		return node
   161  	}
   162  	return nil
   163  }
   164  

View as plain text