...

Source file src/github.com/yuin/goldmark/extension/typographer.go

Documentation: github.com/yuin/goldmark/extension

     1  package extension
     2  
     3  import (
     4  	"unicode"
     5  
     6  	"github.com/yuin/goldmark"
     7  	gast "github.com/yuin/goldmark/ast"
     8  	"github.com/yuin/goldmark/parser"
     9  	"github.com/yuin/goldmark/text"
    10  	"github.com/yuin/goldmark/util"
    11  )
    12  
    13  var uncloseCounterKey = parser.NewContextKey()
    14  
    15  type unclosedCounter struct {
    16  	Single int
    17  	Double int
    18  }
    19  
    20  func (u *unclosedCounter) Reset() {
    21  	u.Single = 0
    22  	u.Double = 0
    23  }
    24  
    25  func getUnclosedCounter(pc parser.Context) *unclosedCounter {
    26  	v := pc.Get(uncloseCounterKey)
    27  	if v == nil {
    28  		v = &unclosedCounter{}
    29  		pc.Set(uncloseCounterKey, v)
    30  	}
    31  	return v.(*unclosedCounter)
    32  }
    33  
    34  // TypographicPunctuation is a key of the punctuations that can be replaced with
    35  // typographic entities.
    36  type TypographicPunctuation int
    37  
    38  const (
    39  	// LeftSingleQuote is '
    40  	LeftSingleQuote TypographicPunctuation = iota + 1
    41  	// RightSingleQuote is '
    42  	RightSingleQuote
    43  	// LeftDoubleQuote is "
    44  	LeftDoubleQuote
    45  	// RightDoubleQuote is "
    46  	RightDoubleQuote
    47  	// EnDash is --
    48  	EnDash
    49  	// EmDash is ---
    50  	EmDash
    51  	// Ellipsis is ...
    52  	Ellipsis
    53  	// LeftAngleQuote is <<
    54  	LeftAngleQuote
    55  	// RightAngleQuote is >>
    56  	RightAngleQuote
    57  	// Apostrophe is '
    58  	Apostrophe
    59  
    60  	typographicPunctuationMax
    61  )
    62  
    63  // An TypographerConfig struct is a data structure that holds configuration of the
    64  // Typographer extension.
    65  type TypographerConfig struct {
    66  	Substitutions [][]byte
    67  }
    68  
    69  func newDefaultSubstitutions() [][]byte {
    70  	replacements := make([][]byte, typographicPunctuationMax)
    71  	replacements[LeftSingleQuote] = []byte("&lsquo;")
    72  	replacements[RightSingleQuote] = []byte("&rsquo;")
    73  	replacements[LeftDoubleQuote] = []byte("&ldquo;")
    74  	replacements[RightDoubleQuote] = []byte("&rdquo;")
    75  	replacements[EnDash] = []byte("&ndash;")
    76  	replacements[EmDash] = []byte("&mdash;")
    77  	replacements[Ellipsis] = []byte("&hellip;")
    78  	replacements[LeftAngleQuote] = []byte("&laquo;")
    79  	replacements[RightAngleQuote] = []byte("&raquo;")
    80  	replacements[Apostrophe] = []byte("&rsquo;")
    81  
    82  	return replacements
    83  }
    84  
    85  // SetOption implements SetOptioner.
    86  func (b *TypographerConfig) SetOption(name parser.OptionName, value interface{}) {
    87  	switch name {
    88  	case optTypographicSubstitutions:
    89  		b.Substitutions = value.([][]byte)
    90  	}
    91  }
    92  
    93  // A TypographerOption interface sets options for the TypographerParser.
    94  type TypographerOption interface {
    95  	parser.Option
    96  	SetTypographerOption(*TypographerConfig)
    97  }
    98  
    99  const optTypographicSubstitutions parser.OptionName = "TypographicSubstitutions"
   100  
   101  // TypographicSubstitutions is a list of the substitutions for the Typographer extension.
   102  type TypographicSubstitutions map[TypographicPunctuation][]byte
   103  
   104  type withTypographicSubstitutions struct {
   105  	value [][]byte
   106  }
   107  
   108  func (o *withTypographicSubstitutions) SetParserOption(c *parser.Config) {
   109  	c.Options[optTypographicSubstitutions] = o.value
   110  }
   111  
   112  func (o *withTypographicSubstitutions) SetTypographerOption(p *TypographerConfig) {
   113  	p.Substitutions = o.value
   114  }
   115  
   116  // WithTypographicSubstitutions is a functional otpion that specify replacement text
   117  // for punctuations.
   118  func WithTypographicSubstitutions(values map[TypographicPunctuation][]byte) TypographerOption {
   119  	replacements := newDefaultSubstitutions()
   120  	for k, v := range values {
   121  		replacements[k] = v
   122  	}
   123  
   124  	return &withTypographicSubstitutions{replacements}
   125  }
   126  
   127  type typographerDelimiterProcessor struct {
   128  }
   129  
   130  func (p *typographerDelimiterProcessor) IsDelimiter(b byte) bool {
   131  	return b == '\'' || b == '"'
   132  }
   133  
   134  func (p *typographerDelimiterProcessor) CanOpenCloser(opener, closer *parser.Delimiter) bool {
   135  	return opener.Char == closer.Char
   136  }
   137  
   138  func (p *typographerDelimiterProcessor) OnMatch(consumes int) gast.Node {
   139  	return nil
   140  }
   141  
   142  var defaultTypographerDelimiterProcessor = &typographerDelimiterProcessor{}
   143  
   144  type typographerParser struct {
   145  	TypographerConfig
   146  }
   147  
   148  // NewTypographerParser return a new InlineParser that parses
   149  // typographer expressions.
   150  func NewTypographerParser(opts ...TypographerOption) parser.InlineParser {
   151  	p := &typographerParser{
   152  		TypographerConfig: TypographerConfig{
   153  			Substitutions: newDefaultSubstitutions(),
   154  		},
   155  	}
   156  	for _, o := range opts {
   157  		o.SetTypographerOption(&p.TypographerConfig)
   158  	}
   159  	return p
   160  }
   161  
   162  func (s *typographerParser) Trigger() []byte {
   163  	return []byte{'\'', '"', '-', '.', ',', '<', '>', '*', '['}
   164  }
   165  
   166  func (s *typographerParser) Parse(parent gast.Node, block text.Reader, pc parser.Context) gast.Node {
   167  	line, _ := block.PeekLine()
   168  	c := line[0]
   169  	if len(line) > 2 {
   170  		if c == '-' {
   171  			if s.Substitutions[EmDash] != nil && line[1] == '-' && line[2] == '-' { // ---
   172  				node := gast.NewString(s.Substitutions[EmDash])
   173  				node.SetCode(true)
   174  				block.Advance(3)
   175  				return node
   176  			}
   177  		} else if c == '.' {
   178  			if s.Substitutions[Ellipsis] != nil && line[1] == '.' && line[2] == '.' { // ...
   179  				node := gast.NewString(s.Substitutions[Ellipsis])
   180  				node.SetCode(true)
   181  				block.Advance(3)
   182  				return node
   183  			}
   184  			return nil
   185  		}
   186  	}
   187  	if len(line) > 1 {
   188  		if c == '<' {
   189  			if s.Substitutions[LeftAngleQuote] != nil && line[1] == '<' { // <<
   190  				node := gast.NewString(s.Substitutions[LeftAngleQuote])
   191  				node.SetCode(true)
   192  				block.Advance(2)
   193  				return node
   194  			}
   195  			return nil
   196  		} else if c == '>' {
   197  			if s.Substitutions[RightAngleQuote] != nil && line[1] == '>' { // >>
   198  				node := gast.NewString(s.Substitutions[RightAngleQuote])
   199  				node.SetCode(true)
   200  				block.Advance(2)
   201  				return node
   202  			}
   203  			return nil
   204  		} else if s.Substitutions[EnDash] != nil && c == '-' && line[1] == '-' { // --
   205  			node := gast.NewString(s.Substitutions[EnDash])
   206  			node.SetCode(true)
   207  			block.Advance(2)
   208  			return node
   209  		}
   210  	}
   211  	if c == '\'' || c == '"' {
   212  		before := block.PrecendingCharacter()
   213  		d := parser.ScanDelimiter(line, before, 1, defaultTypographerDelimiterProcessor)
   214  		if d == nil {
   215  			return nil
   216  		}
   217  		counter := getUnclosedCounter(pc)
   218  		if c == '\'' {
   219  			if s.Substitutions[Apostrophe] != nil {
   220  				// Handle decade abbrevations such as '90s
   221  				if d.CanOpen && !d.CanClose && len(line) > 3 && util.IsNumeric(line[1]) && util.IsNumeric(line[2]) && line[3] == 's' {
   222  					after := rune(' ')
   223  					if len(line) > 4 {
   224  						after = util.ToRune(line, 4)
   225  					}
   226  					if len(line) == 3 || util.IsSpaceRune(after) || util.IsPunctRune(after) {
   227  						node := gast.NewString(s.Substitutions[Apostrophe])
   228  						node.SetCode(true)
   229  						block.Advance(1)
   230  						return node
   231  					}
   232  				}
   233  				// special cases: 'twas, 'em, 'net
   234  				if len(line) > 1 && (unicode.IsPunct(before) || unicode.IsSpace(before)) && (line[1] == 't' || line[1] == 'e' || line[1] == 'n' || line[1] == 'l') {
   235  					node := gast.NewString(s.Substitutions[Apostrophe])
   236  					node.SetCode(true)
   237  					block.Advance(1)
   238  					return node
   239  				}
   240  				// Convert normal apostrophes. This is probably more flexible than necessary but
   241  				// converts any apostrophe in between two alphanumerics.
   242  				if len(line) > 1 && (unicode.IsDigit(before) || unicode.IsLetter(before)) && (unicode.IsLetter(util.ToRune(line, 1))) {
   243  					node := gast.NewString(s.Substitutions[Apostrophe])
   244  					node.SetCode(true)
   245  					block.Advance(1)
   246  					return node
   247  				}
   248  			}
   249  			if s.Substitutions[LeftSingleQuote] != nil && d.CanOpen && !d.CanClose {
   250  				nt := LeftSingleQuote
   251  				// special cases: Alice's, I'm, Don't, You'd
   252  				if len(line) > 1 && (line[1] == 's' || line[1] == 'm' || line[1] == 't' || line[1] == 'd') && (len(line) < 3 || util.IsPunct(line[2]) || util.IsSpace(line[2])) {
   253  					nt = RightSingleQuote
   254  				}
   255  				// special cases: I've, I'll, You're
   256  				if len(line) > 2 && ((line[1] == 'v' && line[2] == 'e') || (line[1] == 'l' && line[2] == 'l') || (line[1] == 'r' && line[2] == 'e')) && (len(line) < 4 || util.IsPunct(line[3]) || util.IsSpace(line[3])) {
   257  					nt = RightSingleQuote
   258  				}
   259  				if nt == LeftSingleQuote {
   260  					counter.Single++
   261  				}
   262  
   263  				node := gast.NewString(s.Substitutions[nt])
   264  				node.SetCode(true)
   265  				block.Advance(1)
   266  				return node
   267  			}
   268  			if s.Substitutions[RightSingleQuote] != nil {
   269  				// plural possesives and abbreviations: Smiths', doin'
   270  				if len(line) > 1 && unicode.IsSpace(util.ToRune(line, 0)) || unicode.IsPunct(util.ToRune(line, 0)) && (len(line) > 2 && !unicode.IsDigit(util.ToRune(line, 1))) {
   271  					node := gast.NewString(s.Substitutions[RightSingleQuote])
   272  					node.SetCode(true)
   273  					block.Advance(1)
   274  					return node
   275  				}
   276  			}
   277  			if s.Substitutions[RightSingleQuote] != nil && counter.Single > 0 {
   278  				isClose := d.CanClose && !d.CanOpen
   279  				maybeClose := d.CanClose && d.CanOpen && len(line) > 1 && unicode.IsPunct(util.ToRune(line, 1)) && (len(line) == 2 || (len(line) > 2 && util.IsPunct(line[2]) || util.IsSpace(line[2])))
   280  				if isClose || maybeClose {
   281  					node := gast.NewString(s.Substitutions[RightSingleQuote])
   282  					node.SetCode(true)
   283  					block.Advance(1)
   284  					counter.Single--
   285  					return node
   286  				}
   287  			}
   288  		}
   289  		if c == '"' {
   290  			if s.Substitutions[LeftDoubleQuote] != nil && d.CanOpen && !d.CanClose {
   291  				node := gast.NewString(s.Substitutions[LeftDoubleQuote])
   292  				node.SetCode(true)
   293  				block.Advance(1)
   294  				counter.Double++
   295  				return node
   296  			}
   297  			if s.Substitutions[RightDoubleQuote] != nil && counter.Double > 0 {
   298  				isClose := d.CanClose && !d.CanOpen
   299  				maybeClose := d.CanClose && d.CanOpen && len(line) > 1 && (unicode.IsPunct(util.ToRune(line, 1))) && (len(line) == 2 || (len(line) > 2 && util.IsPunct(line[2]) || util.IsSpace(line[2])))
   300  				if isClose || maybeClose {
   301  					// special case: "Monitor 21""
   302  					if len(line) > 1 && line[1] == '"' && unicode.IsDigit(before) {
   303  						return nil
   304  					}
   305  					node := gast.NewString(s.Substitutions[RightDoubleQuote])
   306  					node.SetCode(true)
   307  					block.Advance(1)
   308  					counter.Double--
   309  					return node
   310  				}
   311  			}
   312  		}
   313  	}
   314  	return nil
   315  }
   316  
   317  func (s *typographerParser) CloseBlock(parent gast.Node, pc parser.Context) {
   318  	getUnclosedCounter(pc).Reset()
   319  }
   320  
   321  type typographer struct {
   322  	options []TypographerOption
   323  }
   324  
   325  // Typographer is an extension that replaces punctuations with typographic entities.
   326  var Typographer = &typographer{}
   327  
   328  // NewTypographer returns a new Extender that replaces punctuations with typographic entities.
   329  func NewTypographer(opts ...TypographerOption) goldmark.Extender {
   330  	return &typographer{
   331  		options: opts,
   332  	}
   333  }
   334  
   335  func (e *typographer) Extend(m goldmark.Markdown) {
   336  	m.Parser().AddOptions(parser.WithInlineParsers(
   337  		util.Prioritized(NewTypographerParser(e.options...), 9999),
   338  	))
   339  }
   340  

View as plain text