1
2
3
4
5 package parse
6
7 import (
8 "fmt"
9 "strings"
10 "unicode"
11 "unicode/utf8"
12 )
13
14
15 type item struct {
16 typ itemType
17 pos Pos
18 val string
19 line int
20 }
21
22 func (i item) String() string {
23 switch {
24 case i.typ == itemEOF:
25 return "EOF"
26 case i.typ == itemError:
27 return i.val
28 case i.typ > itemKeyword:
29 return fmt.Sprintf("<%s>", i.val)
30 case len(i.val) > 10:
31 return fmt.Sprintf("%.10q...", i.val)
32 }
33 return fmt.Sprintf("%q", i.val)
34 }
35
36
37 type itemType int
38
39 const (
40 itemError itemType = iota
41 itemBool
42 itemChar
43 itemCharConstant
44 itemComment
45 itemComplex
46 itemAssign
47 itemDeclare
48 itemEOF
49 itemField
50 itemIdentifier
51 itemLeftDelim
52 itemLeftParen
53 itemNumber
54 itemPipe
55 itemRawString
56 itemRightDelim
57 itemRightParen
58 itemSpace
59 itemString
60 itemText
61 itemVariable
62
63 itemKeyword
64 itemBlock
65 itemBreak
66 itemContinue
67 itemDot
68 itemDefine
69 itemElse
70 itemEnd
71 itemIf
72 itemNil
73 itemRange
74 itemTemplate
75 itemWith
76 )
77
78 var key = map[string]itemType{
79 ".": itemDot,
80 "block": itemBlock,
81 "break": itemBreak,
82 "continue": itemContinue,
83 "define": itemDefine,
84 "else": itemElse,
85 "end": itemEnd,
86 "if": itemIf,
87 "range": itemRange,
88 "nil": itemNil,
89 "template": itemTemplate,
90 "with": itemWith,
91 }
92
93 const eof = -1
94
95
96
97
98
99
100
101
102
103 const (
104 spaceChars = " \t\r\n"
105 trimMarker = '-'
106 trimMarkerLen = Pos(1 + 1)
107 )
108
109
110 type stateFn func(*lexer) stateFn
111
112
113 type lexer struct {
114 name string
115 input string
116 leftDelim string
117 rightDelim string
118 emitComment bool
119 pos Pos
120 start Pos
121 atEOF bool
122 items chan item
123 parenDepth int
124 line int
125 startLine int
126 breakOK bool
127 continueOK bool
128 }
129
130
131 func (l *lexer) next() rune {
132 if int(l.pos) >= len(l.input) {
133 l.atEOF = true
134 return eof
135 }
136 r, w := utf8.DecodeRuneInString(l.input[l.pos:])
137 l.pos += Pos(w)
138 if r == '\n' {
139 l.line++
140 }
141 return r
142 }
143
144
145 func (l *lexer) peek() rune {
146 r := l.next()
147 l.backup()
148 return r
149 }
150
151
152 func (l *lexer) backup() {
153 if !l.atEOF && l.pos > 0 {
154 r, w := utf8.DecodeLastRuneInString(l.input[:l.pos])
155 l.pos -= Pos(w)
156
157 if r == '\n' {
158 l.line--
159 }
160 }
161 }
162
163
164 func (l *lexer) emit(t itemType) {
165 l.items <- item{t, l.start, l.input[l.start:l.pos], l.startLine}
166 l.start = l.pos
167 l.startLine = l.line
168 }
169
170
171 func (l *lexer) ignore() {
172 l.line += strings.Count(l.input[l.start:l.pos], "\n")
173 l.start = l.pos
174 l.startLine = l.line
175 }
176
177
178 func (l *lexer) accept(valid string) bool {
179 if strings.ContainsRune(valid, l.next()) {
180 return true
181 }
182 l.backup()
183 return false
184 }
185
186
187 func (l *lexer) acceptRun(valid string) {
188 for strings.ContainsRune(valid, l.next()) {
189 }
190 l.backup()
191 }
192
193
194
195 func (l *lexer) errorf(format string, args ...any) stateFn {
196 l.items <- item{itemError, l.start, fmt.Sprintf(format, args...), l.startLine}
197 return nil
198 }
199
200
201
202 func (l *lexer) nextItem() item {
203 return <-l.items
204 }
205
206
207
208 func (l *lexer) drain() {
209 for range l.items {
210 }
211 }
212
213
214 func lex(name, input, left, right string, emitComment, breakOK, continueOK bool) *lexer {
215 if left == "" {
216 left = leftDelim
217 }
218 if right == "" {
219 right = rightDelim
220 }
221 l := &lexer{
222 name: name,
223 input: input,
224 leftDelim: left,
225 rightDelim: right,
226 emitComment: emitComment,
227 breakOK: breakOK,
228 continueOK: continueOK,
229 items: make(chan item),
230 line: 1,
231 startLine: 1,
232 }
233 go l.run()
234 return l
235 }
236
237
238 func (l *lexer) run() {
239 for state := lexText; state != nil; {
240 state = state(l)
241 }
242 close(l.items)
243 }
244
245
246
247 const (
248 leftDelim = "{{"
249 rightDelim = "}}"
250 leftComment = "/*"
251 rightComment = "*/"
252 )
253
254
255 func lexText(l *lexer) stateFn {
256 if x := strings.Index(l.input[l.pos:], l.leftDelim); x >= 0 {
257 ldn := Pos(len(l.leftDelim))
258 l.pos += Pos(x)
259 trimLength := Pos(0)
260 if hasLeftTrimMarker(l.input[l.pos+ldn:]) {
261 trimLength = rightTrimLength(l.input[l.start:l.pos])
262 }
263 l.pos -= trimLength
264 if l.pos > l.start {
265 l.line += strings.Count(l.input[l.start:l.pos], "\n")
266 l.emit(itemText)
267 }
268 l.pos += trimLength
269 l.ignore()
270 return lexLeftDelim
271 }
272 l.pos = Pos(len(l.input))
273
274 if l.pos > l.start {
275 l.line += strings.Count(l.input[l.start:l.pos], "\n")
276 l.emit(itemText)
277 }
278 l.emit(itemEOF)
279 return nil
280 }
281
282
283 func rightTrimLength(s string) Pos {
284 return Pos(len(s) - len(strings.TrimRight(s, spaceChars)))
285 }
286
287
288 func (l *lexer) atRightDelim() (delim, trimSpaces bool) {
289 if hasRightTrimMarker(l.input[l.pos:]) && strings.HasPrefix(l.input[l.pos+trimMarkerLen:], l.rightDelim) {
290 return true, true
291 }
292 if strings.HasPrefix(l.input[l.pos:], l.rightDelim) {
293 return true, false
294 }
295 return false, false
296 }
297
298
299 func leftTrimLength(s string) Pos {
300 return Pos(len(s) - len(strings.TrimLeft(s, spaceChars)))
301 }
302
303
304 func lexLeftDelim(l *lexer) stateFn {
305 l.pos += Pos(len(l.leftDelim))
306 trimSpace := hasLeftTrimMarker(l.input[l.pos:])
307 afterMarker := Pos(0)
308 if trimSpace {
309 afterMarker = trimMarkerLen
310 }
311 if strings.HasPrefix(l.input[l.pos+afterMarker:], leftComment) {
312 l.pos += afterMarker
313 l.ignore()
314 return lexComment
315 }
316 l.emit(itemLeftDelim)
317 l.pos += afterMarker
318 l.ignore()
319 l.parenDepth = 0
320 return lexInsideAction
321 }
322
323
324 func lexComment(l *lexer) stateFn {
325 l.pos += Pos(len(leftComment))
326 i := strings.Index(l.input[l.pos:], rightComment)
327 if i < 0 {
328 return l.errorf("unclosed comment")
329 }
330 l.pos += Pos(i + len(rightComment))
331 delim, trimSpace := l.atRightDelim()
332 if !delim {
333 return l.errorf("comment ends before closing delimiter")
334 }
335 if l.emitComment {
336 l.emit(itemComment)
337 }
338 if trimSpace {
339 l.pos += trimMarkerLen
340 }
341 l.pos += Pos(len(l.rightDelim))
342 if trimSpace {
343 l.pos += leftTrimLength(l.input[l.pos:])
344 }
345 l.ignore()
346 return lexText
347 }
348
349
350 func lexRightDelim(l *lexer) stateFn {
351 trimSpace := hasRightTrimMarker(l.input[l.pos:])
352 if trimSpace {
353 l.pos += trimMarkerLen
354 l.ignore()
355 }
356 l.pos += Pos(len(l.rightDelim))
357 l.emit(itemRightDelim)
358 if trimSpace {
359 l.pos += leftTrimLength(l.input[l.pos:])
360 l.ignore()
361 }
362 return lexText
363 }
364
365
366 func lexInsideAction(l *lexer) stateFn {
367
368
369
370 delim, _ := l.atRightDelim()
371 if delim {
372 if l.parenDepth == 0 {
373 return lexRightDelim
374 }
375 return l.errorf("unclosed left paren")
376 }
377 switch r := l.next(); {
378 case r == eof:
379 return l.errorf("unclosed action")
380 case isSpace(r):
381 l.backup()
382 return lexSpace
383 case r == '=':
384 l.emit(itemAssign)
385 case r == ':':
386 if l.next() != '=' {
387 return l.errorf("expected :=")
388 }
389 l.emit(itemDeclare)
390 case r == '|':
391 l.emit(itemPipe)
392 case r == '"':
393 return lexQuote
394 case r == '`':
395 return lexRawQuote
396 case r == '$':
397 return lexVariable
398 case r == '\'':
399 return lexChar
400 case r == '.':
401
402 if l.pos < Pos(len(l.input)) {
403 r := l.input[l.pos]
404 if r < '0' || '9' < r {
405 return lexField
406 }
407 }
408 fallthrough
409 case r == '+' || r == '-' || ('0' <= r && r <= '9'):
410 l.backup()
411 return lexNumber
412 case isAlphaNumeric(r):
413 l.backup()
414 return lexIdentifier
415 case r == '(':
416 l.emit(itemLeftParen)
417 l.parenDepth++
418 case r == ')':
419 l.emit(itemRightParen)
420 l.parenDepth--
421 if l.parenDepth < 0 {
422 return l.errorf("unexpected right paren %#U", r)
423 }
424 case r <= unicode.MaxASCII && unicode.IsPrint(r):
425 l.emit(itemChar)
426 default:
427 return l.errorf("unrecognized character in action: %#U", r)
428 }
429 return lexInsideAction
430 }
431
432
433
434
435 func lexSpace(l *lexer) stateFn {
436 var r rune
437 var numSpaces int
438 for {
439 r = l.peek()
440 if !isSpace(r) {
441 break
442 }
443 l.next()
444 numSpaces++
445 }
446
447
448 if hasRightTrimMarker(l.input[l.pos-1:]) && strings.HasPrefix(l.input[l.pos-1+trimMarkerLen:], l.rightDelim) {
449 l.backup()
450 if numSpaces == 1 {
451 return lexRightDelim
452 }
453 }
454 l.emit(itemSpace)
455 return lexInsideAction
456 }
457
458
459 func lexIdentifier(l *lexer) stateFn {
460 Loop:
461 for {
462 switch r := l.next(); {
463 case isAlphaNumeric(r):
464
465 default:
466 l.backup()
467 word := l.input[l.start:l.pos]
468 if !l.atTerminator() {
469 return l.errorf("bad character %#U", r)
470 }
471 switch {
472 case key[word] > itemKeyword:
473 item := key[word]
474 if item == itemBreak && !l.breakOK || item == itemContinue && !l.continueOK {
475 l.emit(itemIdentifier)
476 } else {
477 l.emit(item)
478 }
479 case word[0] == '.':
480 l.emit(itemField)
481 case word == "true", word == "false":
482 l.emit(itemBool)
483 default:
484 l.emit(itemIdentifier)
485 }
486 break Loop
487 }
488 }
489 return lexInsideAction
490 }
491
492
493
494 func lexField(l *lexer) stateFn {
495 return lexFieldOrVariable(l, itemField)
496 }
497
498
499
500 func lexVariable(l *lexer) stateFn {
501 if l.atTerminator() {
502 l.emit(itemVariable)
503 return lexInsideAction
504 }
505 return lexFieldOrVariable(l, itemVariable)
506 }
507
508
509
510 func lexFieldOrVariable(l *lexer, typ itemType) stateFn {
511 if l.atTerminator() {
512 if typ == itemVariable {
513 l.emit(itemVariable)
514 } else {
515 l.emit(itemDot)
516 }
517 return lexInsideAction
518 }
519 var r rune
520 for {
521 r = l.next()
522 if !isAlphaNumeric(r) {
523 l.backup()
524 break
525 }
526 }
527 if !l.atTerminator() {
528 return l.errorf("bad character %#U", r)
529 }
530 l.emit(typ)
531 return lexInsideAction
532 }
533
534
535
536
537
538 func (l *lexer) atTerminator() bool {
539 r := l.peek()
540 if isSpace(r) {
541 return true
542 }
543 switch r {
544 case eof, '.', ',', '|', ':', ')', '(':
545 return true
546 }
547 return strings.HasPrefix(l.input[l.pos:], l.rightDelim)
548 }
549
550
551
552 func lexChar(l *lexer) stateFn {
553 Loop:
554 for {
555 switch l.next() {
556 case '\\':
557 if r := l.next(); r != eof && r != '\n' {
558 break
559 }
560 fallthrough
561 case eof, '\n':
562 return l.errorf("unterminated character constant")
563 case '\'':
564 break Loop
565 }
566 }
567 l.emit(itemCharConstant)
568 return lexInsideAction
569 }
570
571
572
573
574
575 func lexNumber(l *lexer) stateFn {
576 if !l.scanNumber() {
577 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
578 }
579 if sign := l.peek(); sign == '+' || sign == '-' {
580
581 if !l.scanNumber() || l.input[l.pos-1] != 'i' {
582 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
583 }
584 l.emit(itemComplex)
585 } else {
586 l.emit(itemNumber)
587 }
588 return lexInsideAction
589 }
590
591 func (l *lexer) scanNumber() bool {
592
593 l.accept("+-")
594
595 digits := "0123456789_"
596 if l.accept("0") {
597
598 if l.accept("xX") {
599 digits = "0123456789abcdefABCDEF_"
600 } else if l.accept("oO") {
601 digits = "01234567_"
602 } else if l.accept("bB") {
603 digits = "01_"
604 }
605 }
606 l.acceptRun(digits)
607 if l.accept(".") {
608 l.acceptRun(digits)
609 }
610 if len(digits) == 10+1 && l.accept("eE") {
611 l.accept("+-")
612 l.acceptRun("0123456789_")
613 }
614 if len(digits) == 16+6+1 && l.accept("pP") {
615 l.accept("+-")
616 l.acceptRun("0123456789_")
617 }
618
619 l.accept("i")
620
621 if isAlphaNumeric(l.peek()) {
622 l.next()
623 return false
624 }
625 return true
626 }
627
628
629 func lexQuote(l *lexer) stateFn {
630 Loop:
631 for {
632 switch l.next() {
633 case '\\':
634 if r := l.next(); r != eof && r != '\n' {
635 break
636 }
637 fallthrough
638 case eof, '\n':
639 return l.errorf("unterminated quoted string")
640 case '"':
641 break Loop
642 }
643 }
644 l.emit(itemString)
645 return lexInsideAction
646 }
647
648
649 func lexRawQuote(l *lexer) stateFn {
650 Loop:
651 for {
652 switch l.next() {
653 case eof:
654 return l.errorf("unterminated raw quoted string")
655 case '`':
656 break Loop
657 }
658 }
659 l.emit(itemRawString)
660 return lexInsideAction
661 }
662
663
664 func isSpace(r rune) bool {
665 return r == ' ' || r == '\t' || r == '\r' || r == '\n'
666 }
667
668
669 func isAlphaNumeric(r rune) bool {
670 return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
671 }
672
673 func hasLeftTrimMarker(s string) bool {
674 return len(s) >= 2 && s[0] == trimMarker && isSpace(rune(s[1]))
675 }
676
677 func hasRightTrimMarker(s string) bool {
678 return len(s) >= 2 && isSpace(rune(s[0])) && s[1] == trimMarker
679 }
680
View as plain text