1
2
3
4
5 package syntax
6
7 import (
8 "sort"
9 "strings"
10 "unicode"
11 "unicode/utf8"
12 )
13
14
15
16 type Error struct {
17 Code ErrorCode
18 Expr string
19 }
20
21 func (e *Error) Error() string {
22 return "error parsing regexp: " + e.Code.String() + ": `" + e.Expr + "`"
23 }
24
25
26 type ErrorCode string
27
28 const (
29
30 ErrInternalError ErrorCode = "regexp/syntax: internal error"
31
32
33 ErrInvalidCharClass ErrorCode = "invalid character class"
34 ErrInvalidCharRange ErrorCode = "invalid character class range"
35 ErrInvalidEscape ErrorCode = "invalid escape sequence"
36 ErrInvalidNamedCapture ErrorCode = "invalid named capture"
37 ErrInvalidPerlOp ErrorCode = "invalid or unsupported Perl syntax"
38 ErrInvalidRepeatOp ErrorCode = "invalid nested repetition operator"
39 ErrInvalidRepeatSize ErrorCode = "invalid repeat count"
40 ErrInvalidUTF8 ErrorCode = "invalid UTF-8"
41 ErrMissingBracket ErrorCode = "missing closing ]"
42 ErrMissingParen ErrorCode = "missing closing )"
43 ErrMissingRepeatArgument ErrorCode = "missing argument to repetition operator"
44 ErrTrailingBackslash ErrorCode = "trailing backslash at end of expression"
45 ErrUnexpectedParen ErrorCode = "unexpected )"
46 ErrNestingDepth ErrorCode = "expression nests too deeply"
47 )
48
49 func (e ErrorCode) String() string {
50 return string(e)
51 }
52
53
54 type Flags uint16
55
56 const (
57 FoldCase Flags = 1 << iota
58 Literal
59 ClassNL
60 DotNL
61 OneLine
62 NonGreedy
63 PerlX
64 UnicodeGroups
65 WasDollar
66 Simple
67
68 MatchNL = ClassNL | DotNL
69
70 Perl = ClassNL | OneLine | PerlX | UnicodeGroups
71 POSIX Flags = 0
72 )
73
74
75 const (
76 opLeftParen = opPseudo + iota
77 opVerticalBar
78 )
79
80
81
82
83
84
85
86
87
88
89
90
91
92 const maxHeight = 1000
93
94
95
96
97
98
99
100 const (
101 maxSize = 128 << 20 / instSize
102 instSize = 5 * 8
103 )
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120 const (
121 maxRunes = 128 << 20 / runeSize
122 runeSize = 4
123 )
124
125 type parser struct {
126 flags Flags
127 stack []*Regexp
128 free *Regexp
129 numCap int
130 wholeRegexp string
131 tmpClass []rune
132 numRegexp int
133 numRunes int
134 repeats int64
135 height map[*Regexp]int
136 size map[*Regexp]int64
137 }
138
139 func (p *parser) newRegexp(op Op) *Regexp {
140 re := p.free
141 if re != nil {
142 p.free = re.Sub0[0]
143 *re = Regexp{}
144 } else {
145 re = new(Regexp)
146 p.numRegexp++
147 }
148 re.Op = op
149 return re
150 }
151
152 func (p *parser) reuse(re *Regexp) {
153 if p.height != nil {
154 delete(p.height, re)
155 }
156 re.Sub0[0] = p.free
157 p.free = re
158 }
159
160 func (p *parser) checkLimits(re *Regexp) {
161 if p.numRunes > maxRunes {
162 panic(ErrInternalError)
163 }
164 p.checkSize(re)
165 p.checkHeight(re)
166 }
167
168 func (p *parser) checkSize(re *Regexp) {
169 if p.size == nil {
170
171
172
173
174
175 if p.repeats == 0 {
176 p.repeats = 1
177 }
178 if re.Op == OpRepeat {
179 n := re.Max
180 if n == -1 {
181 n = re.Min
182 }
183 if n <= 0 {
184 n = 1
185 }
186 if int64(n) > maxSize/p.repeats {
187 p.repeats = maxSize
188 } else {
189 p.repeats *= int64(n)
190 }
191 }
192 if int64(p.numRegexp) < maxSize/p.repeats {
193 return
194 }
195
196
197
198
199 p.size = make(map[*Regexp]int64)
200 for _, re := range p.stack {
201 p.checkSize(re)
202 }
203 }
204
205 if p.calcSize(re, true) > maxSize {
206 panic(ErrInternalError)
207 }
208 }
209
210 func (p *parser) calcSize(re *Regexp, force bool) int64 {
211 if !force {
212 if size, ok := p.size[re]; ok {
213 return size
214 }
215 }
216
217 var size int64
218 switch re.Op {
219 case OpLiteral:
220 size = int64(len(re.Rune))
221 case OpCapture, OpStar:
222
223 size = 2 + p.calcSize(re.Sub[0], false)
224 case OpPlus, OpQuest:
225 size = 1 + p.calcSize(re.Sub[0], false)
226 case OpConcat:
227 for _, sub := range re.Sub {
228 size += p.calcSize(sub, false)
229 }
230 case OpAlternate:
231 for _, sub := range re.Sub {
232 size += p.calcSize(sub, false)
233 }
234 if len(re.Sub) > 1 {
235 size += int64(len(re.Sub)) - 1
236 }
237 case OpRepeat:
238 sub := p.calcSize(re.Sub[0], false)
239 if re.Max == -1 {
240 if re.Min == 0 {
241 size = 2 + sub
242 } else {
243 size = 1 + int64(re.Min)*sub
244 }
245 break
246 }
247
248 size = int64(re.Max)*sub + int64(re.Max-re.Min)
249 }
250
251 if size < 1 {
252 size = 1
253 }
254 p.size[re] = size
255 return size
256 }
257
258 func (p *parser) checkHeight(re *Regexp) {
259 if p.numRegexp < maxHeight {
260 return
261 }
262 if p.height == nil {
263 p.height = make(map[*Regexp]int)
264 for _, re := range p.stack {
265 p.checkHeight(re)
266 }
267 }
268 if p.calcHeight(re, true) > maxHeight {
269 panic(ErrNestingDepth)
270 }
271 }
272
273 func (p *parser) calcHeight(re *Regexp, force bool) int {
274 if !force {
275 if h, ok := p.height[re]; ok {
276 return h
277 }
278 }
279 h := 1
280 for _, sub := range re.Sub {
281 hsub := p.calcHeight(sub, false)
282 if h < 1+hsub {
283 h = 1 + hsub
284 }
285 }
286 p.height[re] = h
287 return h
288 }
289
290
291
292
293 func (p *parser) push(re *Regexp) *Regexp {
294 p.numRunes += len(re.Rune)
295 if re.Op == OpCharClass && len(re.Rune) == 2 && re.Rune[0] == re.Rune[1] {
296
297 if p.maybeConcat(re.Rune[0], p.flags&^FoldCase) {
298 return nil
299 }
300 re.Op = OpLiteral
301 re.Rune = re.Rune[:1]
302 re.Flags = p.flags &^ FoldCase
303 } else if re.Op == OpCharClass && len(re.Rune) == 4 &&
304 re.Rune[0] == re.Rune[1] && re.Rune[2] == re.Rune[3] &&
305 unicode.SimpleFold(re.Rune[0]) == re.Rune[2] &&
306 unicode.SimpleFold(re.Rune[2]) == re.Rune[0] ||
307 re.Op == OpCharClass && len(re.Rune) == 2 &&
308 re.Rune[0]+1 == re.Rune[1] &&
309 unicode.SimpleFold(re.Rune[0]) == re.Rune[1] &&
310 unicode.SimpleFold(re.Rune[1]) == re.Rune[0] {
311
312 if p.maybeConcat(re.Rune[0], p.flags|FoldCase) {
313 return nil
314 }
315
316
317 re.Op = OpLiteral
318 re.Rune = re.Rune[:1]
319 re.Flags = p.flags | FoldCase
320 } else {
321
322 p.maybeConcat(-1, 0)
323 }
324
325 p.stack = append(p.stack, re)
326 p.checkLimits(re)
327 return re
328 }
329
330
331
332
333
334
335
336
337
338
339 func (p *parser) maybeConcat(r rune, flags Flags) bool {
340 n := len(p.stack)
341 if n < 2 {
342 return false
343 }
344
345 re1 := p.stack[n-1]
346 re2 := p.stack[n-2]
347 if re1.Op != OpLiteral || re2.Op != OpLiteral || re1.Flags&FoldCase != re2.Flags&FoldCase {
348 return false
349 }
350
351
352 re2.Rune = append(re2.Rune, re1.Rune...)
353
354
355 if r >= 0 {
356 re1.Rune = re1.Rune0[:1]
357 re1.Rune[0] = r
358 re1.Flags = flags
359 return true
360 }
361
362 p.stack = p.stack[:n-1]
363 p.reuse(re1)
364 return false
365 }
366
367
368 func (p *parser) literal(r rune) {
369 re := p.newRegexp(OpLiteral)
370 re.Flags = p.flags
371 if p.flags&FoldCase != 0 {
372 r = minFoldRune(r)
373 }
374 re.Rune0[0] = r
375 re.Rune = re.Rune0[:1]
376 p.push(re)
377 }
378
379
380 func minFoldRune(r rune) rune {
381 if r < minFold || r > maxFold {
382 return r
383 }
384 min := r
385 r0 := r
386 for r = unicode.SimpleFold(r); r != r0; r = unicode.SimpleFold(r) {
387 if min > r {
388 min = r
389 }
390 }
391 return min
392 }
393
394
395
396 func (p *parser) op(op Op) *Regexp {
397 re := p.newRegexp(op)
398 re.Flags = p.flags
399 return p.push(re)
400 }
401
402
403
404
405
406 func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) (string, error) {
407 flags := p.flags
408 if p.flags&PerlX != 0 {
409 if len(after) > 0 && after[0] == '?' {
410 after = after[1:]
411 flags ^= NonGreedy
412 }
413 if lastRepeat != "" {
414
415
416
417 return "", &Error{ErrInvalidRepeatOp, lastRepeat[:len(lastRepeat)-len(after)]}
418 }
419 }
420 n := len(p.stack)
421 if n == 0 {
422 return "", &Error{ErrMissingRepeatArgument, before[:len(before)-len(after)]}
423 }
424 sub := p.stack[n-1]
425 if sub.Op >= opPseudo {
426 return "", &Error{ErrMissingRepeatArgument, before[:len(before)-len(after)]}
427 }
428
429 re := p.newRegexp(op)
430 re.Min = min
431 re.Max = max
432 re.Flags = flags
433 re.Sub = re.Sub0[:1]
434 re.Sub[0] = sub
435 p.stack[n-1] = re
436 p.checkLimits(re)
437
438 if op == OpRepeat && (min >= 2 || max >= 2) && !repeatIsValid(re, 1000) {
439 return "", &Error{ErrInvalidRepeatSize, before[:len(before)-len(after)]}
440 }
441
442 return after, nil
443 }
444
445
446
447
448
449
450
451
452
453
454 func repeatIsValid(re *Regexp, n int) bool {
455 if re.Op == OpRepeat {
456 m := re.Max
457 if m == 0 {
458 return true
459 }
460 if m < 0 {
461 m = re.Min
462 }
463 if m > n {
464 return false
465 }
466 if m > 0 {
467 n /= m
468 }
469 }
470 for _, sub := range re.Sub {
471 if !repeatIsValid(sub, n) {
472 return false
473 }
474 }
475 return true
476 }
477
478
479 func (p *parser) concat() *Regexp {
480 p.maybeConcat(-1, 0)
481
482
483 i := len(p.stack)
484 for i > 0 && p.stack[i-1].Op < opPseudo {
485 i--
486 }
487 subs := p.stack[i:]
488 p.stack = p.stack[:i]
489
490
491 if len(subs) == 0 {
492 return p.push(p.newRegexp(OpEmptyMatch))
493 }
494
495 return p.push(p.collapse(subs, OpConcat))
496 }
497
498
499 func (p *parser) alternate() *Regexp {
500
501
502 i := len(p.stack)
503 for i > 0 && p.stack[i-1].Op < opPseudo {
504 i--
505 }
506 subs := p.stack[i:]
507 p.stack = p.stack[:i]
508
509
510
511 if len(subs) > 0 {
512 cleanAlt(subs[len(subs)-1])
513 }
514
515
516
517 if len(subs) == 0 {
518 return p.push(p.newRegexp(OpNoMatch))
519 }
520
521 return p.push(p.collapse(subs, OpAlternate))
522 }
523
524
525 func cleanAlt(re *Regexp) {
526 switch re.Op {
527 case OpCharClass:
528 re.Rune = cleanClass(&re.Rune)
529 if len(re.Rune) == 2 && re.Rune[0] == 0 && re.Rune[1] == unicode.MaxRune {
530 re.Rune = nil
531 re.Op = OpAnyChar
532 return
533 }
534 if len(re.Rune) == 4 && re.Rune[0] == 0 && re.Rune[1] == '\n'-1 && re.Rune[2] == '\n'+1 && re.Rune[3] == unicode.MaxRune {
535 re.Rune = nil
536 re.Op = OpAnyCharNotNL
537 return
538 }
539 if cap(re.Rune)-len(re.Rune) > 100 {
540
541
542 re.Rune = append(re.Rune0[:0], re.Rune...)
543 }
544 }
545 }
546
547
548
549
550
551 func (p *parser) collapse(subs []*Regexp, op Op) *Regexp {
552 if len(subs) == 1 {
553 return subs[0]
554 }
555 re := p.newRegexp(op)
556 re.Sub = re.Sub0[:0]
557 for _, sub := range subs {
558 if sub.Op == op {
559 re.Sub = append(re.Sub, sub.Sub...)
560 p.reuse(sub)
561 } else {
562 re.Sub = append(re.Sub, sub)
563 }
564 }
565 if op == OpAlternate {
566 re.Sub = p.factor(re.Sub)
567 if len(re.Sub) == 1 {
568 old := re
569 re = re.Sub[0]
570 p.reuse(old)
571 }
572 }
573 return re
574 }
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591 func (p *parser) factor(sub []*Regexp) []*Regexp {
592 if len(sub) < 2 {
593 return sub
594 }
595
596
597 var str []rune
598 var strflags Flags
599 start := 0
600 out := sub[:0]
601 for i := 0; i <= len(sub); i++ {
602
603
604
605
606
607
608 var istr []rune
609 var iflags Flags
610 if i < len(sub) {
611 istr, iflags = p.leadingString(sub[i])
612 if iflags == strflags {
613 same := 0
614 for same < len(str) && same < len(istr) && str[same] == istr[same] {
615 same++
616 }
617 if same > 0 {
618
619
620 str = str[:same]
621 continue
622 }
623 }
624 }
625
626
627
628
629
630
631 if i == start {
632
633 } else if i == start+1 {
634
635 out = append(out, sub[start])
636 } else {
637
638 prefix := p.newRegexp(OpLiteral)
639 prefix.Flags = strflags
640 prefix.Rune = append(prefix.Rune[:0], str...)
641
642 for j := start; j < i; j++ {
643 sub[j] = p.removeLeadingString(sub[j], len(str))
644 p.checkLimits(sub[j])
645 }
646 suffix := p.collapse(sub[start:i], OpAlternate)
647
648 re := p.newRegexp(OpConcat)
649 re.Sub = append(re.Sub[:0], prefix, suffix)
650 out = append(out, re)
651 }
652
653
654 start = i
655 str = istr
656 strflags = iflags
657 }
658 sub = out
659
660
661
662
663
664
665
666
667
668 start = 0
669 out = sub[:0]
670 var first *Regexp
671 for i := 0; i <= len(sub); i++ {
672
673
674
675
676
677 var ifirst *Regexp
678 if i < len(sub) {
679 ifirst = p.leadingRegexp(sub[i])
680 if first != nil && first.Equal(ifirst) &&
681
682 (isCharClass(first) || (first.Op == OpRepeat && first.Min == first.Max && isCharClass(first.Sub[0]))) {
683 continue
684 }
685 }
686
687
688
689
690
691 if i == start {
692
693 } else if i == start+1 {
694
695 out = append(out, sub[start])
696 } else {
697
698 prefix := first
699 for j := start; j < i; j++ {
700 reuse := j != start
701 sub[j] = p.removeLeadingRegexp(sub[j], reuse)
702 p.checkLimits(sub[j])
703 }
704 suffix := p.collapse(sub[start:i], OpAlternate)
705
706 re := p.newRegexp(OpConcat)
707 re.Sub = append(re.Sub[:0], prefix, suffix)
708 out = append(out, re)
709 }
710
711
712 start = i
713 first = ifirst
714 }
715 sub = out
716
717
718 start = 0
719 out = sub[:0]
720 for i := 0; i <= len(sub); i++ {
721
722
723
724
725
726
727 if i < len(sub) && isCharClass(sub[i]) {
728 continue
729 }
730
731
732
733 if i == start {
734
735 } else if i == start+1 {
736 out = append(out, sub[start])
737 } else {
738
739
740 max := start
741 for j := start + 1; j < i; j++ {
742 if sub[max].Op < sub[j].Op || sub[max].Op == sub[j].Op && len(sub[max].Rune) < len(sub[j].Rune) {
743 max = j
744 }
745 }
746 sub[start], sub[max] = sub[max], sub[start]
747
748 for j := start + 1; j < i; j++ {
749 mergeCharClass(sub[start], sub[j])
750 p.reuse(sub[j])
751 }
752 cleanAlt(sub[start])
753 out = append(out, sub[start])
754 }
755
756
757 if i < len(sub) {
758 out = append(out, sub[i])
759 }
760 start = i + 1
761 }
762 sub = out
763
764
765 start = 0
766 out = sub[:0]
767 for i := range sub {
768 if i+1 < len(sub) && sub[i].Op == OpEmptyMatch && sub[i+1].Op == OpEmptyMatch {
769 continue
770 }
771 out = append(out, sub[i])
772 }
773 sub = out
774
775 return sub
776 }
777
778
779
780 func (p *parser) leadingString(re *Regexp) ([]rune, Flags) {
781 if re.Op == OpConcat && len(re.Sub) > 0 {
782 re = re.Sub[0]
783 }
784 if re.Op != OpLiteral {
785 return nil, 0
786 }
787 return re.Rune, re.Flags & FoldCase
788 }
789
790
791
792 func (p *parser) removeLeadingString(re *Regexp, n int) *Regexp {
793 if re.Op == OpConcat && len(re.Sub) > 0 {
794
795
796 sub := re.Sub[0]
797 sub = p.removeLeadingString(sub, n)
798 re.Sub[0] = sub
799 if sub.Op == OpEmptyMatch {
800 p.reuse(sub)
801 switch len(re.Sub) {
802 case 0, 1:
803
804 re.Op = OpEmptyMatch
805 re.Sub = nil
806 case 2:
807 old := re
808 re = re.Sub[1]
809 p.reuse(old)
810 default:
811 copy(re.Sub, re.Sub[1:])
812 re.Sub = re.Sub[:len(re.Sub)-1]
813 }
814 }
815 return re
816 }
817
818 if re.Op == OpLiteral {
819 re.Rune = re.Rune[:copy(re.Rune, re.Rune[n:])]
820 if len(re.Rune) == 0 {
821 re.Op = OpEmptyMatch
822 }
823 }
824 return re
825 }
826
827
828
829 func (p *parser) leadingRegexp(re *Regexp) *Regexp {
830 if re.Op == OpEmptyMatch {
831 return nil
832 }
833 if re.Op == OpConcat && len(re.Sub) > 0 {
834 sub := re.Sub[0]
835 if sub.Op == OpEmptyMatch {
836 return nil
837 }
838 return sub
839 }
840 return re
841 }
842
843
844
845
846 func (p *parser) removeLeadingRegexp(re *Regexp, reuse bool) *Regexp {
847 if re.Op == OpConcat && len(re.Sub) > 0 {
848 if reuse {
849 p.reuse(re.Sub[0])
850 }
851 re.Sub = re.Sub[:copy(re.Sub, re.Sub[1:])]
852 switch len(re.Sub) {
853 case 0:
854 re.Op = OpEmptyMatch
855 re.Sub = nil
856 case 1:
857 old := re
858 re = re.Sub[0]
859 p.reuse(old)
860 }
861 return re
862 }
863 if reuse {
864 p.reuse(re)
865 }
866 return p.newRegexp(OpEmptyMatch)
867 }
868
869 func literalRegexp(s string, flags Flags) *Regexp {
870 re := &Regexp{Op: OpLiteral}
871 re.Flags = flags
872 re.Rune = re.Rune0[:0]
873 for _, c := range s {
874 if len(re.Rune) >= cap(re.Rune) {
875
876 re.Rune = []rune(s)
877 break
878 }
879 re.Rune = append(re.Rune, c)
880 }
881 return re
882 }
883
884
885
886
887
888
889 func Parse(s string, flags Flags) (*Regexp, error) {
890 return parse(s, flags)
891 }
892
893 func parse(s string, flags Flags) (_ *Regexp, err error) {
894 defer func() {
895 switch r := recover(); r {
896 default:
897 panic(r)
898 case nil:
899
900 case ErrInternalError:
901 err = &Error{Code: ErrInternalError, Expr: s}
902 case ErrNestingDepth:
903 err = &Error{Code: ErrNestingDepth, Expr: s}
904 }
905 }()
906
907 if flags&Literal != 0 {
908
909 if err := checkUTF8(s); err != nil {
910 return nil, err
911 }
912 return literalRegexp(s, flags), nil
913 }
914
915
916 var (
917 p parser
918 c rune
919 op Op
920 lastRepeat string
921 )
922 p.flags = flags
923 p.wholeRegexp = s
924 t := s
925 for t != "" {
926 repeat := ""
927 BigSwitch:
928 switch t[0] {
929 default:
930 if c, t, err = nextRune(t); err != nil {
931 return nil, err
932 }
933 p.literal(c)
934
935 case '(':
936 if p.flags&PerlX != 0 && len(t) >= 2 && t[1] == '?' {
937
938 if t, err = p.parsePerlFlags(t); err != nil {
939 return nil, err
940 }
941 break
942 }
943 p.numCap++
944 p.op(opLeftParen).Cap = p.numCap
945 t = t[1:]
946 case '|':
947 if err = p.parseVerticalBar(); err != nil {
948 return nil, err
949 }
950 t = t[1:]
951 case ')':
952 if err = p.parseRightParen(); err != nil {
953 return nil, err
954 }
955 t = t[1:]
956 case '^':
957 if p.flags&OneLine != 0 {
958 p.op(OpBeginText)
959 } else {
960 p.op(OpBeginLine)
961 }
962 t = t[1:]
963 case '$':
964 if p.flags&OneLine != 0 {
965 p.op(OpEndText).Flags |= WasDollar
966 } else {
967 p.op(OpEndLine)
968 }
969 t = t[1:]
970 case '.':
971 if p.flags&DotNL != 0 {
972 p.op(OpAnyChar)
973 } else {
974 p.op(OpAnyCharNotNL)
975 }
976 t = t[1:]
977 case '[':
978 if t, err = p.parseClass(t); err != nil {
979 return nil, err
980 }
981 case '*', '+', '?':
982 before := t
983 switch t[0] {
984 case '*':
985 op = OpStar
986 case '+':
987 op = OpPlus
988 case '?':
989 op = OpQuest
990 }
991 after := t[1:]
992 if after, err = p.repeat(op, 0, 0, before, after, lastRepeat); err != nil {
993 return nil, err
994 }
995 repeat = before
996 t = after
997 case '{':
998 op = OpRepeat
999 before := t
1000 min, max, after, ok := p.parseRepeat(t)
1001 if !ok {
1002
1003 p.literal('{')
1004 t = t[1:]
1005 break
1006 }
1007 if min < 0 || min > 1000 || max > 1000 || max >= 0 && min > max {
1008
1009 return nil, &Error{ErrInvalidRepeatSize, before[:len(before)-len(after)]}
1010 }
1011 if after, err = p.repeat(op, min, max, before, after, lastRepeat); err != nil {
1012 return nil, err
1013 }
1014 repeat = before
1015 t = after
1016 case '\\':
1017 if p.flags&PerlX != 0 && len(t) >= 2 {
1018 switch t[1] {
1019 case 'A':
1020 p.op(OpBeginText)
1021 t = t[2:]
1022 break BigSwitch
1023 case 'b':
1024 p.op(OpWordBoundary)
1025 t = t[2:]
1026 break BigSwitch
1027 case 'B':
1028 p.op(OpNoWordBoundary)
1029 t = t[2:]
1030 break BigSwitch
1031 case 'C':
1032
1033 return nil, &Error{ErrInvalidEscape, t[:2]}
1034 case 'Q':
1035
1036 var lit string
1037 lit, t, _ = strings.Cut(t[2:], `\E`)
1038 for lit != "" {
1039 c, rest, err := nextRune(lit)
1040 if err != nil {
1041 return nil, err
1042 }
1043 p.literal(c)
1044 lit = rest
1045 }
1046 break BigSwitch
1047 case 'z':
1048 p.op(OpEndText)
1049 t = t[2:]
1050 break BigSwitch
1051 }
1052 }
1053
1054 re := p.newRegexp(OpCharClass)
1055 re.Flags = p.flags
1056
1057
1058 if len(t) >= 2 && (t[1] == 'p' || t[1] == 'P') {
1059 r, rest, err := p.parseUnicodeClass(t, re.Rune0[:0])
1060 if err != nil {
1061 return nil, err
1062 }
1063 if r != nil {
1064 re.Rune = r
1065 t = rest
1066 p.push(re)
1067 break BigSwitch
1068 }
1069 }
1070
1071
1072 if r, rest := p.parsePerlClassEscape(t, re.Rune0[:0]); r != nil {
1073 re.Rune = r
1074 t = rest
1075 p.push(re)
1076 break BigSwitch
1077 }
1078 p.reuse(re)
1079
1080
1081 if c, t, err = p.parseEscape(t); err != nil {
1082 return nil, err
1083 }
1084 p.literal(c)
1085 }
1086 lastRepeat = repeat
1087 }
1088
1089 p.concat()
1090 if p.swapVerticalBar() {
1091
1092 p.stack = p.stack[:len(p.stack)-1]
1093 }
1094 p.alternate()
1095
1096 n := len(p.stack)
1097 if n != 1 {
1098 return nil, &Error{ErrMissingParen, s}
1099 }
1100 return p.stack[0], nil
1101 }
1102
1103
1104
1105
1106 func (p *parser) parseRepeat(s string) (min, max int, rest string, ok bool) {
1107 if s == "" || s[0] != '{' {
1108 return
1109 }
1110 s = s[1:]
1111 var ok1 bool
1112 if min, s, ok1 = p.parseInt(s); !ok1 {
1113 return
1114 }
1115 if s == "" {
1116 return
1117 }
1118 if s[0] != ',' {
1119 max = min
1120 } else {
1121 s = s[1:]
1122 if s == "" {
1123 return
1124 }
1125 if s[0] == '}' {
1126 max = -1
1127 } else if max, s, ok1 = p.parseInt(s); !ok1 {
1128 return
1129 } else if max < 0 {
1130
1131 min = -1
1132 }
1133 }
1134 if s == "" || s[0] != '}' {
1135 return
1136 }
1137 rest = s[1:]
1138 ok = true
1139 return
1140 }
1141
1142
1143
1144
1145 func (p *parser) parsePerlFlags(s string) (rest string, err error) {
1146 t := s
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163 if len(t) > 4 && t[2] == 'P' && t[3] == '<' {
1164
1165 end := strings.IndexRune(t, '>')
1166 if end < 0 {
1167 if err = checkUTF8(t); err != nil {
1168 return "", err
1169 }
1170 return "", &Error{ErrInvalidNamedCapture, s}
1171 }
1172
1173 capture := t[:end+1]
1174 name := t[4:end]
1175 if err = checkUTF8(name); err != nil {
1176 return "", err
1177 }
1178 if !isValidCaptureName(name) {
1179 return "", &Error{ErrInvalidNamedCapture, capture}
1180 }
1181
1182
1183 p.numCap++
1184 re := p.op(opLeftParen)
1185 re.Cap = p.numCap
1186 re.Name = name
1187 return t[end+1:], nil
1188 }
1189
1190
1191 var c rune
1192 t = t[2:]
1193 flags := p.flags
1194 sign := +1
1195 sawFlag := false
1196 Loop:
1197 for t != "" {
1198 if c, t, err = nextRune(t); err != nil {
1199 return "", err
1200 }
1201 switch c {
1202 default:
1203 break Loop
1204
1205
1206 case 'i':
1207 flags |= FoldCase
1208 sawFlag = true
1209 case 'm':
1210 flags &^= OneLine
1211 sawFlag = true
1212 case 's':
1213 flags |= DotNL
1214 sawFlag = true
1215 case 'U':
1216 flags |= NonGreedy
1217 sawFlag = true
1218
1219
1220 case '-':
1221 if sign < 0 {
1222 break Loop
1223 }
1224 sign = -1
1225
1226
1227 flags = ^flags
1228 sawFlag = false
1229
1230
1231 case ':', ')':
1232 if sign < 0 {
1233 if !sawFlag {
1234 break Loop
1235 }
1236 flags = ^flags
1237 }
1238 if c == ':' {
1239
1240 p.op(opLeftParen)
1241 }
1242 p.flags = flags
1243 return t, nil
1244 }
1245 }
1246
1247 return "", &Error{ErrInvalidPerlOp, s[:len(s)-len(t)]}
1248 }
1249
1250
1251
1252
1253
1254
1255 func isValidCaptureName(name string) bool {
1256 if name == "" {
1257 return false
1258 }
1259 for _, c := range name {
1260 if c != '_' && !isalnum(c) {
1261 return false
1262 }
1263 }
1264 return true
1265 }
1266
1267
1268 func (p *parser) parseInt(s string) (n int, rest string, ok bool) {
1269 if s == "" || s[0] < '0' || '9' < s[0] {
1270 return
1271 }
1272
1273 if len(s) >= 2 && s[0] == '0' && '0' <= s[1] && s[1] <= '9' {
1274 return
1275 }
1276 t := s
1277 for s != "" && '0' <= s[0] && s[0] <= '9' {
1278 s = s[1:]
1279 }
1280 rest = s
1281 ok = true
1282
1283 t = t[:len(t)-len(s)]
1284 for i := 0; i < len(t); i++ {
1285
1286 if n >= 1e8 {
1287 n = -1
1288 break
1289 }
1290 n = n*10 + int(t[i]) - '0'
1291 }
1292 return
1293 }
1294
1295
1296
1297 func isCharClass(re *Regexp) bool {
1298 return re.Op == OpLiteral && len(re.Rune) == 1 ||
1299 re.Op == OpCharClass ||
1300 re.Op == OpAnyCharNotNL ||
1301 re.Op == OpAnyChar
1302 }
1303
1304
1305 func matchRune(re *Regexp, r rune) bool {
1306 switch re.Op {
1307 case OpLiteral:
1308 return len(re.Rune) == 1 && re.Rune[0] == r
1309 case OpCharClass:
1310 for i := 0; i < len(re.Rune); i += 2 {
1311 if re.Rune[i] <= r && r <= re.Rune[i+1] {
1312 return true
1313 }
1314 }
1315 return false
1316 case OpAnyCharNotNL:
1317 return r != '\n'
1318 case OpAnyChar:
1319 return true
1320 }
1321 return false
1322 }
1323
1324
1325 func (p *parser) parseVerticalBar() error {
1326 p.concat()
1327
1328
1329
1330
1331
1332 if !p.swapVerticalBar() {
1333 p.op(opVerticalBar)
1334 }
1335
1336 return nil
1337 }
1338
1339
1340
1341
1342 func mergeCharClass(dst, src *Regexp) {
1343 switch dst.Op {
1344 case OpAnyChar:
1345
1346 case OpAnyCharNotNL:
1347
1348 if matchRune(src, '\n') {
1349 dst.Op = OpAnyChar
1350 }
1351 case OpCharClass:
1352
1353 if src.Op == OpLiteral {
1354 dst.Rune = appendLiteral(dst.Rune, src.Rune[0], src.Flags)
1355 } else {
1356 dst.Rune = appendClass(dst.Rune, src.Rune)
1357 }
1358 case OpLiteral:
1359
1360 if src.Rune[0] == dst.Rune[0] && src.Flags == dst.Flags {
1361 break
1362 }
1363 dst.Op = OpCharClass
1364 dst.Rune = appendLiteral(dst.Rune[:0], dst.Rune[0], dst.Flags)
1365 dst.Rune = appendLiteral(dst.Rune, src.Rune[0], src.Flags)
1366 }
1367 }
1368
1369
1370
1371
1372 func (p *parser) swapVerticalBar() bool {
1373
1374
1375 n := len(p.stack)
1376 if n >= 3 && p.stack[n-2].Op == opVerticalBar && isCharClass(p.stack[n-1]) && isCharClass(p.stack[n-3]) {
1377 re1 := p.stack[n-1]
1378 re3 := p.stack[n-3]
1379
1380 if re1.Op > re3.Op {
1381 re1, re3 = re3, re1
1382 p.stack[n-3] = re3
1383 }
1384 mergeCharClass(re3, re1)
1385 p.reuse(re1)
1386 p.stack = p.stack[:n-1]
1387 return true
1388 }
1389
1390 if n >= 2 {
1391 re1 := p.stack[n-1]
1392 re2 := p.stack[n-2]
1393 if re2.Op == opVerticalBar {
1394 if n >= 3 {
1395
1396
1397 cleanAlt(p.stack[n-3])
1398 }
1399 p.stack[n-2] = re1
1400 p.stack[n-1] = re2
1401 return true
1402 }
1403 }
1404 return false
1405 }
1406
1407
1408 func (p *parser) parseRightParen() error {
1409 p.concat()
1410 if p.swapVerticalBar() {
1411
1412 p.stack = p.stack[:len(p.stack)-1]
1413 }
1414 p.alternate()
1415
1416 n := len(p.stack)
1417 if n < 2 {
1418 return &Error{ErrUnexpectedParen, p.wholeRegexp}
1419 }
1420 re1 := p.stack[n-1]
1421 re2 := p.stack[n-2]
1422 p.stack = p.stack[:n-2]
1423 if re2.Op != opLeftParen {
1424 return &Error{ErrUnexpectedParen, p.wholeRegexp}
1425 }
1426
1427 p.flags = re2.Flags
1428 if re2.Cap == 0 {
1429
1430 p.push(re1)
1431 } else {
1432 re2.Op = OpCapture
1433 re2.Sub = re2.Sub0[:1]
1434 re2.Sub[0] = re1
1435 p.push(re2)
1436 }
1437 return nil
1438 }
1439
1440
1441
1442 func (p *parser) parseEscape(s string) (r rune, rest string, err error) {
1443 t := s[1:]
1444 if t == "" {
1445 return 0, "", &Error{ErrTrailingBackslash, ""}
1446 }
1447 c, t, err := nextRune(t)
1448 if err != nil {
1449 return 0, "", err
1450 }
1451
1452 Switch:
1453 switch c {
1454 default:
1455 if c < utf8.RuneSelf && !isalnum(c) {
1456
1457
1458
1459
1460 return c, t, nil
1461 }
1462
1463
1464 case '1', '2', '3', '4', '5', '6', '7':
1465
1466 if t == "" || t[0] < '0' || t[0] > '7' {
1467 break
1468 }
1469 fallthrough
1470 case '0':
1471
1472 r = c - '0'
1473 for i := 1; i < 3; i++ {
1474 if t == "" || t[0] < '0' || t[0] > '7' {
1475 break
1476 }
1477 r = r*8 + rune(t[0]) - '0'
1478 t = t[1:]
1479 }
1480 return r, t, nil
1481
1482
1483 case 'x':
1484 if t == "" {
1485 break
1486 }
1487 if c, t, err = nextRune(t); err != nil {
1488 return 0, "", err
1489 }
1490 if c == '{' {
1491
1492
1493
1494
1495 nhex := 0
1496 r = 0
1497 for {
1498 if t == "" {
1499 break Switch
1500 }
1501 if c, t, err = nextRune(t); err != nil {
1502 return 0, "", err
1503 }
1504 if c == '}' {
1505 break
1506 }
1507 v := unhex(c)
1508 if v < 0 {
1509 break Switch
1510 }
1511 r = r*16 + v
1512 if r > unicode.MaxRune {
1513 break Switch
1514 }
1515 nhex++
1516 }
1517 if nhex == 0 {
1518 break Switch
1519 }
1520 return r, t, nil
1521 }
1522
1523
1524 x := unhex(c)
1525 if c, t, err = nextRune(t); err != nil {
1526 return 0, "", err
1527 }
1528 y := unhex(c)
1529 if x < 0 || y < 0 {
1530 break
1531 }
1532 return x*16 + y, t, nil
1533
1534
1535
1536
1537
1538
1539
1540 case 'a':
1541 return '\a', t, err
1542 case 'f':
1543 return '\f', t, err
1544 case 'n':
1545 return '\n', t, err
1546 case 'r':
1547 return '\r', t, err
1548 case 't':
1549 return '\t', t, err
1550 case 'v':
1551 return '\v', t, err
1552 }
1553 return 0, "", &Error{ErrInvalidEscape, s[:len(s)-len(t)]}
1554 }
1555
1556
1557
1558 func (p *parser) parseClassChar(s, wholeClass string) (r rune, rest string, err error) {
1559 if s == "" {
1560 return 0, "", &Error{Code: ErrMissingBracket, Expr: wholeClass}
1561 }
1562
1563
1564
1565 if s[0] == '\\' {
1566 return p.parseEscape(s)
1567 }
1568
1569 return nextRune(s)
1570 }
1571
1572 type charGroup struct {
1573 sign int
1574 class []rune
1575 }
1576
1577
1578
1579
1580 func (p *parser) parsePerlClassEscape(s string, r []rune) (out []rune, rest string) {
1581 if p.flags&PerlX == 0 || len(s) < 2 || s[0] != '\\' {
1582 return
1583 }
1584 g := perlGroup[s[0:2]]
1585 if g.sign == 0 {
1586 return
1587 }
1588 return p.appendGroup(r, g), s[2:]
1589 }
1590
1591
1592
1593
1594 func (p *parser) parseNamedClass(s string, r []rune) (out []rune, rest string, err error) {
1595 if len(s) < 2 || s[0] != '[' || s[1] != ':' {
1596 return
1597 }
1598
1599 i := strings.Index(s[2:], ":]")
1600 if i < 0 {
1601 return
1602 }
1603 i += 2
1604 name, s := s[0:i+2], s[i+2:]
1605 g := posixGroup[name]
1606 if g.sign == 0 {
1607 return nil, "", &Error{ErrInvalidCharRange, name}
1608 }
1609 return p.appendGroup(r, g), s, nil
1610 }
1611
1612 func (p *parser) appendGroup(r []rune, g charGroup) []rune {
1613 if p.flags&FoldCase == 0 {
1614 if g.sign < 0 {
1615 r = appendNegatedClass(r, g.class)
1616 } else {
1617 r = appendClass(r, g.class)
1618 }
1619 } else {
1620 tmp := p.tmpClass[:0]
1621 tmp = appendFoldedClass(tmp, g.class)
1622 p.tmpClass = tmp
1623 tmp = cleanClass(&p.tmpClass)
1624 if g.sign < 0 {
1625 r = appendNegatedClass(r, tmp)
1626 } else {
1627 r = appendClass(r, tmp)
1628 }
1629 }
1630 return r
1631 }
1632
1633 var anyTable = &unicode.RangeTable{
1634 R16: []unicode.Range16{{Lo: 0, Hi: 1<<16 - 1, Stride: 1}},
1635 R32: []unicode.Range32{{Lo: 1 << 16, Hi: unicode.MaxRune, Stride: 1}},
1636 }
1637
1638
1639
1640 func unicodeTable(name string) (*unicode.RangeTable, *unicode.RangeTable) {
1641
1642 if name == "Any" {
1643 return anyTable, anyTable
1644 }
1645 if t := unicode.Categories[name]; t != nil {
1646 return t, unicode.FoldCategory[name]
1647 }
1648 if t := unicode.Scripts[name]; t != nil {
1649 return t, unicode.FoldScript[name]
1650 }
1651 return nil, nil
1652 }
1653
1654
1655
1656
1657 func (p *parser) parseUnicodeClass(s string, r []rune) (out []rune, rest string, err error) {
1658 if p.flags&UnicodeGroups == 0 || len(s) < 2 || s[0] != '\\' || s[1] != 'p' && s[1] != 'P' {
1659 return
1660 }
1661
1662
1663 sign := +1
1664 if s[1] == 'P' {
1665 sign = -1
1666 }
1667 t := s[2:]
1668 c, t, err := nextRune(t)
1669 if err != nil {
1670 return
1671 }
1672 var seq, name string
1673 if c != '{' {
1674
1675 seq = s[:len(s)-len(t)]
1676 name = seq[2:]
1677 } else {
1678
1679 end := strings.IndexRune(s, '}')
1680 if end < 0 {
1681 if err = checkUTF8(s); err != nil {
1682 return
1683 }
1684 return nil, "", &Error{ErrInvalidCharRange, s}
1685 }
1686 seq, t = s[:end+1], s[end+1:]
1687 name = s[3:end]
1688 if err = checkUTF8(name); err != nil {
1689 return
1690 }
1691 }
1692
1693
1694 if name != "" && name[0] == '^' {
1695 sign = -sign
1696 name = name[1:]
1697 }
1698
1699 tab, fold := unicodeTable(name)
1700 if tab == nil {
1701 return nil, "", &Error{ErrInvalidCharRange, seq}
1702 }
1703
1704 if p.flags&FoldCase == 0 || fold == nil {
1705 if sign > 0 {
1706 r = appendTable(r, tab)
1707 } else {
1708 r = appendNegatedTable(r, tab)
1709 }
1710 } else {
1711
1712
1713
1714 tmp := p.tmpClass[:0]
1715 tmp = appendTable(tmp, tab)
1716 tmp = appendTable(tmp, fold)
1717 p.tmpClass = tmp
1718 tmp = cleanClass(&p.tmpClass)
1719 if sign > 0 {
1720 r = appendClass(r, tmp)
1721 } else {
1722 r = appendNegatedClass(r, tmp)
1723 }
1724 }
1725 return r, t, nil
1726 }
1727
1728
1729
1730 func (p *parser) parseClass(s string) (rest string, err error) {
1731 t := s[1:]
1732 re := p.newRegexp(OpCharClass)
1733 re.Flags = p.flags
1734 re.Rune = re.Rune0[:0]
1735
1736 sign := +1
1737 if t != "" && t[0] == '^' {
1738 sign = -1
1739 t = t[1:]
1740
1741
1742
1743 if p.flags&ClassNL == 0 {
1744 re.Rune = append(re.Rune, '\n', '\n')
1745 }
1746 }
1747
1748 class := re.Rune
1749 first := true
1750 for t == "" || t[0] != ']' || first {
1751
1752
1753 if t != "" && t[0] == '-' && p.flags&PerlX == 0 && !first && (len(t) == 1 || t[1] != ']') {
1754 _, size := utf8.DecodeRuneInString(t[1:])
1755 return "", &Error{Code: ErrInvalidCharRange, Expr: t[:1+size]}
1756 }
1757 first = false
1758
1759
1760 if len(t) > 2 && t[0] == '[' && t[1] == ':' {
1761 nclass, nt, err := p.parseNamedClass(t, class)
1762 if err != nil {
1763 return "", err
1764 }
1765 if nclass != nil {
1766 class, t = nclass, nt
1767 continue
1768 }
1769 }
1770
1771
1772 nclass, nt, err := p.parseUnicodeClass(t, class)
1773 if err != nil {
1774 return "", err
1775 }
1776 if nclass != nil {
1777 class, t = nclass, nt
1778 continue
1779 }
1780
1781
1782 if nclass, nt := p.parsePerlClassEscape(t, class); nclass != nil {
1783 class, t = nclass, nt
1784 continue
1785 }
1786
1787
1788 rng := t
1789 var lo, hi rune
1790 if lo, t, err = p.parseClassChar(t, s); err != nil {
1791 return "", err
1792 }
1793 hi = lo
1794
1795 if len(t) >= 2 && t[0] == '-' && t[1] != ']' {
1796 t = t[1:]
1797 if hi, t, err = p.parseClassChar(t, s); err != nil {
1798 return "", err
1799 }
1800 if hi < lo {
1801 rng = rng[:len(rng)-len(t)]
1802 return "", &Error{Code: ErrInvalidCharRange, Expr: rng}
1803 }
1804 }
1805 if p.flags&FoldCase == 0 {
1806 class = appendRange(class, lo, hi)
1807 } else {
1808 class = appendFoldedRange(class, lo, hi)
1809 }
1810 }
1811 t = t[1:]
1812
1813
1814 re.Rune = class
1815 class = cleanClass(&re.Rune)
1816 if sign < 0 {
1817 class = negateClass(class)
1818 }
1819 re.Rune = class
1820 p.push(re)
1821 return t, nil
1822 }
1823
1824
1825
1826 func cleanClass(rp *[]rune) []rune {
1827
1828
1829 sort.Sort(ranges{rp})
1830
1831 r := *rp
1832 if len(r) < 2 {
1833 return r
1834 }
1835
1836
1837 w := 2
1838 for i := 2; i < len(r); i += 2 {
1839 lo, hi := r[i], r[i+1]
1840 if lo <= r[w-1]+1 {
1841
1842 if hi > r[w-1] {
1843 r[w-1] = hi
1844 }
1845 continue
1846 }
1847
1848 r[w] = lo
1849 r[w+1] = hi
1850 w += 2
1851 }
1852
1853 return r[:w]
1854 }
1855
1856
1857 func appendLiteral(r []rune, x rune, flags Flags) []rune {
1858 if flags&FoldCase != 0 {
1859 return appendFoldedRange(r, x, x)
1860 }
1861 return appendRange(r, x, x)
1862 }
1863
1864
1865 func appendRange(r []rune, lo, hi rune) []rune {
1866
1867
1868
1869
1870 n := len(r)
1871 for i := 2; i <= 4; i += 2 {
1872 if n >= i {
1873 rlo, rhi := r[n-i], r[n-i+1]
1874 if lo <= rhi+1 && rlo <= hi+1 {
1875 if lo < rlo {
1876 r[n-i] = lo
1877 }
1878 if hi > rhi {
1879 r[n-i+1] = hi
1880 }
1881 return r
1882 }
1883 }
1884 }
1885
1886 return append(r, lo, hi)
1887 }
1888
1889 const (
1890
1891
1892 minFold = 0x0041
1893 maxFold = 0x1e943
1894 )
1895
1896
1897
1898 func appendFoldedRange(r []rune, lo, hi rune) []rune {
1899
1900 if lo <= minFold && hi >= maxFold {
1901
1902 return appendRange(r, lo, hi)
1903 }
1904 if hi < minFold || lo > maxFold {
1905
1906 return appendRange(r, lo, hi)
1907 }
1908 if lo < minFold {
1909
1910 r = appendRange(r, lo, minFold-1)
1911 lo = minFold
1912 }
1913 if hi > maxFold {
1914
1915 r = appendRange(r, maxFold+1, hi)
1916 hi = maxFold
1917 }
1918
1919
1920 for c := lo; c <= hi; c++ {
1921 r = appendRange(r, c, c)
1922 f := unicode.SimpleFold(c)
1923 for f != c {
1924 r = appendRange(r, f, f)
1925 f = unicode.SimpleFold(f)
1926 }
1927 }
1928 return r
1929 }
1930
1931
1932
1933 func appendClass(r []rune, x []rune) []rune {
1934 for i := 0; i < len(x); i += 2 {
1935 r = appendRange(r, x[i], x[i+1])
1936 }
1937 return r
1938 }
1939
1940
1941 func appendFoldedClass(r []rune, x []rune) []rune {
1942 for i := 0; i < len(x); i += 2 {
1943 r = appendFoldedRange(r, x[i], x[i+1])
1944 }
1945 return r
1946 }
1947
1948
1949
1950 func appendNegatedClass(r []rune, x []rune) []rune {
1951 nextLo := '\u0000'
1952 for i := 0; i < len(x); i += 2 {
1953 lo, hi := x[i], x[i+1]
1954 if nextLo <= lo-1 {
1955 r = appendRange(r, nextLo, lo-1)
1956 }
1957 nextLo = hi + 1
1958 }
1959 if nextLo <= unicode.MaxRune {
1960 r = appendRange(r, nextLo, unicode.MaxRune)
1961 }
1962 return r
1963 }
1964
1965
1966 func appendTable(r []rune, x *unicode.RangeTable) []rune {
1967 for _, xr := range x.R16 {
1968 lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
1969 if stride == 1 {
1970 r = appendRange(r, lo, hi)
1971 continue
1972 }
1973 for c := lo; c <= hi; c += stride {
1974 r = appendRange(r, c, c)
1975 }
1976 }
1977 for _, xr := range x.R32 {
1978 lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
1979 if stride == 1 {
1980 r = appendRange(r, lo, hi)
1981 continue
1982 }
1983 for c := lo; c <= hi; c += stride {
1984 r = appendRange(r, c, c)
1985 }
1986 }
1987 return r
1988 }
1989
1990
1991 func appendNegatedTable(r []rune, x *unicode.RangeTable) []rune {
1992 nextLo := '\u0000'
1993 for _, xr := range x.R16 {
1994 lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
1995 if stride == 1 {
1996 if nextLo <= lo-1 {
1997 r = appendRange(r, nextLo, lo-1)
1998 }
1999 nextLo = hi + 1
2000 continue
2001 }
2002 for c := lo; c <= hi; c += stride {
2003 if nextLo <= c-1 {
2004 r = appendRange(r, nextLo, c-1)
2005 }
2006 nextLo = c + 1
2007 }
2008 }
2009 for _, xr := range x.R32 {
2010 lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
2011 if stride == 1 {
2012 if nextLo <= lo-1 {
2013 r = appendRange(r, nextLo, lo-1)
2014 }
2015 nextLo = hi + 1
2016 continue
2017 }
2018 for c := lo; c <= hi; c += stride {
2019 if nextLo <= c-1 {
2020 r = appendRange(r, nextLo, c-1)
2021 }
2022 nextLo = c + 1
2023 }
2024 }
2025 if nextLo <= unicode.MaxRune {
2026 r = appendRange(r, nextLo, unicode.MaxRune)
2027 }
2028 return r
2029 }
2030
2031
2032
2033 func negateClass(r []rune) []rune {
2034 nextLo := '\u0000'
2035 w := 0
2036 for i := 0; i < len(r); i += 2 {
2037 lo, hi := r[i], r[i+1]
2038 if nextLo <= lo-1 {
2039 r[w] = nextLo
2040 r[w+1] = lo - 1
2041 w += 2
2042 }
2043 nextLo = hi + 1
2044 }
2045 r = r[:w]
2046 if nextLo <= unicode.MaxRune {
2047
2048
2049 r = append(r, nextLo, unicode.MaxRune)
2050 }
2051 return r
2052 }
2053
2054
2055
2056
2057
2058 type ranges struct {
2059 p *[]rune
2060 }
2061
2062 func (ra ranges) Less(i, j int) bool {
2063 p := *ra.p
2064 i *= 2
2065 j *= 2
2066 return p[i] < p[j] || p[i] == p[j] && p[i+1] > p[j+1]
2067 }
2068
2069 func (ra ranges) Len() int {
2070 return len(*ra.p) / 2
2071 }
2072
2073 func (ra ranges) Swap(i, j int) {
2074 p := *ra.p
2075 i *= 2
2076 j *= 2
2077 p[i], p[i+1], p[j], p[j+1] = p[j], p[j+1], p[i], p[i+1]
2078 }
2079
2080 func checkUTF8(s string) error {
2081 for s != "" {
2082 rune, size := utf8.DecodeRuneInString(s)
2083 if rune == utf8.RuneError && size == 1 {
2084 return &Error{Code: ErrInvalidUTF8, Expr: s}
2085 }
2086 s = s[size:]
2087 }
2088 return nil
2089 }
2090
2091 func nextRune(s string) (c rune, t string, err error) {
2092 c, size := utf8.DecodeRuneInString(s)
2093 if c == utf8.RuneError && size == 1 {
2094 return 0, "", &Error{Code: ErrInvalidUTF8, Expr: s}
2095 }
2096 return c, s[size:], nil
2097 }
2098
2099 func isalnum(c rune) bool {
2100 return '0' <= c && c <= '9' || 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
2101 }
2102
2103 func unhex(c rune) rune {
2104 if '0' <= c && c <= '9' {
2105 return c - '0'
2106 }
2107 if 'a' <= c && c <= 'f' {
2108 return c - 'a' + 10
2109 }
2110 if 'A' <= c && c <= 'F' {
2111 return c - 'A' + 10
2112 }
2113 return -1
2114 }
2115
View as plain text