1 package text
2
3 import (
4 "io"
5 "regexp"
6 "unicode/utf8"
7
8 "github.com/yuin/goldmark/util"
9 )
10
11 const invalidValue = -1
12
13
14 const EOF = byte(0xff)
15
16
17 type Reader interface {
18 io.RuneReader
19
20
21 Source() []byte
22
23
24 ResetPosition()
25
26
27 Peek() byte
28
29
30 PeekLine() ([]byte, Segment)
31
32
33 PrecendingCharacter() rune
34
35
36 Value(Segment) []byte
37
38
39 LineOffset() int
40
41
42 Position() (int, Segment)
43
44
45 SetPosition(int, Segment)
46
47
48 SetPadding(int)
49
50
51 Advance(int)
52
53
54
55 AdvanceAndSetPadding(int, int)
56
57
58 AdvanceLine()
59
60
61
62 SkipSpaces() (Segment, int, bool)
63
64
65
66 SkipBlankLines() (Segment, int, bool)
67
68
69 Match(reg *regexp.Regexp) bool
70
71
72 FindSubMatch(reg *regexp.Regexp) [][]byte
73
74
75 FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool)
76 }
77
78
79 type FindClosureOptions struct {
80
81
82 CodeSpan bool
83
84
85
86 Nesting bool
87
88
89
90 Newline bool
91
92
93
94 Advance bool
95 }
96
97 type reader struct {
98 source []byte
99 sourceLength int
100 line int
101 peekedLine []byte
102 pos Segment
103 head int
104 lineOffset int
105 }
106
107
108 func NewReader(source []byte) Reader {
109 r := &reader{
110 source: source,
111 sourceLength: len(source),
112 }
113 r.ResetPosition()
114 return r
115 }
116
117 func (r *reader) FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool) {
118 return findClosureReader(r, opener, closer, options)
119 }
120
121 func (r *reader) ResetPosition() {
122 r.line = -1
123 r.head = 0
124 r.lineOffset = -1
125 r.AdvanceLine()
126 }
127
128 func (r *reader) Source() []byte {
129 return r.source
130 }
131
132 func (r *reader) Value(seg Segment) []byte {
133 return seg.Value(r.source)
134 }
135
136 func (r *reader) Peek() byte {
137 if r.pos.Start >= 0 && r.pos.Start < r.sourceLength {
138 if r.pos.Padding != 0 {
139 return space[0]
140 }
141 return r.source[r.pos.Start]
142 }
143 return EOF
144 }
145
146 func (r *reader) PeekLine() ([]byte, Segment) {
147 if r.pos.Start >= 0 && r.pos.Start < r.sourceLength {
148 if r.peekedLine == nil {
149 r.peekedLine = r.pos.Value(r.Source())
150 }
151 return r.peekedLine, r.pos
152 }
153 return nil, r.pos
154 }
155
156
157 func (r *reader) ReadRune() (rune, int, error) {
158 return readRuneReader(r)
159 }
160
161 func (r *reader) LineOffset() int {
162 if r.lineOffset < 0 {
163 v := 0
164 for i := r.head; i < r.pos.Start; i++ {
165 if r.source[i] == '\t' {
166 v += util.TabWidth(v)
167 } else {
168 v++
169 }
170 }
171 r.lineOffset = v - r.pos.Padding
172 }
173 return r.lineOffset
174 }
175
176 func (r *reader) PrecendingCharacter() rune {
177 if r.pos.Start <= 0 {
178 if r.pos.Padding != 0 {
179 return rune(' ')
180 }
181 return rune('\n')
182 }
183 i := r.pos.Start - 1
184 for ; i >= 0; i-- {
185 if utf8.RuneStart(r.source[i]) {
186 break
187 }
188 }
189 rn, _ := utf8.DecodeRune(r.source[i:])
190 return rn
191 }
192
193 func (r *reader) Advance(n int) {
194 r.lineOffset = -1
195 if n < len(r.peekedLine) && r.pos.Padding == 0 {
196 r.pos.Start += n
197 r.peekedLine = nil
198 return
199 }
200 r.peekedLine = nil
201 l := r.sourceLength
202 for ; n > 0 && r.pos.Start < l; n-- {
203 if r.pos.Padding != 0 {
204 r.pos.Padding--
205 continue
206 }
207 if r.source[r.pos.Start] == '\n' {
208 r.AdvanceLine()
209 continue
210 }
211 r.pos.Start++
212 }
213 }
214
215 func (r *reader) AdvanceAndSetPadding(n, padding int) {
216 r.Advance(n)
217 if padding > r.pos.Padding {
218 r.SetPadding(padding)
219 }
220 }
221
222 func (r *reader) AdvanceLine() {
223 r.lineOffset = -1
224 r.peekedLine = nil
225 r.pos.Start = r.pos.Stop
226 r.head = r.pos.Start
227 if r.pos.Start < 0 {
228 return
229 }
230 r.pos.Stop = r.sourceLength
231 for i := r.pos.Start; i < r.sourceLength; i++ {
232 c := r.source[i]
233 if c == '\n' {
234 r.pos.Stop = i + 1
235 break
236 }
237 }
238 r.line++
239 r.pos.Padding = 0
240 }
241
242 func (r *reader) Position() (int, Segment) {
243 return r.line, r.pos
244 }
245
246 func (r *reader) SetPosition(line int, pos Segment) {
247 r.lineOffset = -1
248 r.line = line
249 r.pos = pos
250 }
251
252 func (r *reader) SetPadding(v int) {
253 r.pos.Padding = v
254 }
255
256 func (r *reader) SkipSpaces() (Segment, int, bool) {
257 return skipSpacesReader(r)
258 }
259
260 func (r *reader) SkipBlankLines() (Segment, int, bool) {
261 return skipBlankLinesReader(r)
262 }
263
264 func (r *reader) Match(reg *regexp.Regexp) bool {
265 return matchReader(r, reg)
266 }
267
268 func (r *reader) FindSubMatch(reg *regexp.Regexp) [][]byte {
269 return findSubMatchReader(r, reg)
270 }
271
272
273 type BlockReader interface {
274 Reader
275
276 Reset(segment *Segments)
277 }
278
279 type blockReader struct {
280 source []byte
281 segments *Segments
282 segmentsLength int
283 line int
284 pos Segment
285 head int
286 last int
287 lineOffset int
288 }
289
290
291 func NewBlockReader(source []byte, segments *Segments) BlockReader {
292 r := &blockReader{
293 source: source,
294 }
295 if segments != nil {
296 r.Reset(segments)
297 }
298 return r
299 }
300
301 func (r *blockReader) FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool) {
302 return findClosureReader(r, opener, closer, options)
303 }
304
305 func (r *blockReader) ResetPosition() {
306 r.line = -1
307 r.head = 0
308 r.last = 0
309 r.lineOffset = -1
310 r.pos.Start = -1
311 r.pos.Stop = -1
312 r.pos.Padding = 0
313 if r.segmentsLength > 0 {
314 last := r.segments.At(r.segmentsLength - 1)
315 r.last = last.Stop
316 }
317 r.AdvanceLine()
318 }
319
320 func (r *blockReader) Reset(segments *Segments) {
321 r.segments = segments
322 r.segmentsLength = segments.Len()
323 r.ResetPosition()
324 }
325
326 func (r *blockReader) Source() []byte {
327 return r.source
328 }
329
330 func (r *blockReader) Value(seg Segment) []byte {
331 line := r.segmentsLength - 1
332 ret := make([]byte, 0, seg.Stop-seg.Start+1)
333 for ; line >= 0; line-- {
334 if seg.Start >= r.segments.At(line).Start {
335 break
336 }
337 }
338 i := seg.Start
339 for ; line < r.segmentsLength; line++ {
340 s := r.segments.At(line)
341 if i < 0 {
342 i = s.Start
343 }
344 ret = s.ConcatPadding(ret)
345 for ; i < seg.Stop && i < s.Stop; i++ {
346 ret = append(ret, r.source[i])
347 }
348 i = -1
349 if s.Stop > seg.Stop {
350 break
351 }
352 }
353 return ret
354 }
355
356
357 func (r *blockReader) ReadRune() (rune, int, error) {
358 return readRuneReader(r)
359 }
360
361 func (r *blockReader) PrecendingCharacter() rune {
362 if r.pos.Padding != 0 {
363 return rune(' ')
364 }
365 if r.segments.Len() < 1 {
366 return rune('\n')
367 }
368 firstSegment := r.segments.At(0)
369 if r.line == 0 && r.pos.Start <= firstSegment.Start {
370 return rune('\n')
371 }
372 l := len(r.source)
373 i := r.pos.Start - 1
374 for ; i < l && i >= 0; i-- {
375 if utf8.RuneStart(r.source[i]) {
376 break
377 }
378 }
379 if i < 0 || i >= l {
380 return rune('\n')
381 }
382 rn, _ := utf8.DecodeRune(r.source[i:])
383 return rn
384 }
385
386 func (r *blockReader) LineOffset() int {
387 if r.lineOffset < 0 {
388 v := 0
389 for i := r.head; i < r.pos.Start; i++ {
390 if r.source[i] == '\t' {
391 v += util.TabWidth(v)
392 } else {
393 v++
394 }
395 }
396 r.lineOffset = v - r.pos.Padding
397 }
398 return r.lineOffset
399 }
400
401 func (r *blockReader) Peek() byte {
402 if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last {
403 if r.pos.Padding != 0 {
404 return space[0]
405 }
406 return r.source[r.pos.Start]
407 }
408 return EOF
409 }
410
411 func (r *blockReader) PeekLine() ([]byte, Segment) {
412 if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last {
413 return r.pos.Value(r.source), r.pos
414 }
415 return nil, r.pos
416 }
417
418 func (r *blockReader) Advance(n int) {
419 r.lineOffset = -1
420
421 if n < r.pos.Stop-r.pos.Start && r.pos.Padding == 0 {
422 r.pos.Start += n
423 return
424 }
425
426 for ; n > 0; n-- {
427 if r.pos.Padding != 0 {
428 r.pos.Padding--
429 continue
430 }
431 if r.pos.Start >= r.pos.Stop-1 && r.pos.Stop < r.last {
432 r.AdvanceLine()
433 continue
434 }
435 r.pos.Start++
436 }
437 }
438
439 func (r *blockReader) AdvanceAndSetPadding(n, padding int) {
440 r.Advance(n)
441 if padding > r.pos.Padding {
442 r.SetPadding(padding)
443 }
444 }
445
446 func (r *blockReader) AdvanceLine() {
447 r.SetPosition(r.line+1, NewSegment(invalidValue, invalidValue))
448 r.head = r.pos.Start
449 }
450
451 func (r *blockReader) Position() (int, Segment) {
452 return r.line, r.pos
453 }
454
455 func (r *blockReader) SetPosition(line int, pos Segment) {
456 r.lineOffset = -1
457 r.line = line
458 if pos.Start == invalidValue {
459 if r.line < r.segmentsLength {
460 s := r.segments.At(line)
461 r.head = s.Start
462 r.pos = s
463 }
464 } else {
465 r.pos = pos
466 if r.line < r.segmentsLength {
467 s := r.segments.At(line)
468 r.head = s.Start
469 }
470 }
471 }
472
473 func (r *blockReader) SetPadding(v int) {
474 r.lineOffset = -1
475 r.pos.Padding = v
476 }
477
478 func (r *blockReader) SkipSpaces() (Segment, int, bool) {
479 return skipSpacesReader(r)
480 }
481
482 func (r *blockReader) SkipBlankLines() (Segment, int, bool) {
483 return skipBlankLinesReader(r)
484 }
485
486 func (r *blockReader) Match(reg *regexp.Regexp) bool {
487 return matchReader(r, reg)
488 }
489
490 func (r *blockReader) FindSubMatch(reg *regexp.Regexp) [][]byte {
491 return findSubMatchReader(r, reg)
492 }
493
494 func skipBlankLinesReader(r Reader) (Segment, int, bool) {
495 lines := 0
496 for {
497 line, seg := r.PeekLine()
498 if line == nil {
499 return seg, lines, false
500 }
501 if util.IsBlank(line) {
502 lines++
503 r.AdvanceLine()
504 } else {
505 return seg, lines, true
506 }
507 }
508 }
509
510 func skipSpacesReader(r Reader) (Segment, int, bool) {
511 chars := 0
512 for {
513 line, segment := r.PeekLine()
514 if line == nil {
515 return segment, chars, false
516 }
517 for i, c := range line {
518 if util.IsSpace(c) {
519 chars++
520 r.Advance(1)
521 continue
522 }
523 return segment.WithStart(segment.Start + i + 1), chars, true
524 }
525 }
526 }
527
528 func matchReader(r Reader, reg *regexp.Regexp) bool {
529 oldline, oldseg := r.Position()
530 match := reg.FindReaderSubmatchIndex(r)
531 r.SetPosition(oldline, oldseg)
532 if match == nil {
533 return false
534 }
535 r.Advance(match[1] - match[0])
536 return true
537 }
538
539 func findSubMatchReader(r Reader, reg *regexp.Regexp) [][]byte {
540 oldline, oldseg := r.Position()
541 match := reg.FindReaderSubmatchIndex(r)
542 r.SetPosition(oldline, oldseg)
543 if match == nil {
544 return nil
545 }
546 runes := make([]rune, 0, match[1]-match[0])
547 for i := 0; i < match[1]; {
548 r, size, _ := readRuneReader(r)
549 i += size
550 runes = append(runes, r)
551 }
552 result := [][]byte{}
553 for i := 0; i < len(match); i += 2 {
554 result = append(result, []byte(string(runes[match[i]:match[i+1]])))
555 }
556
557 r.SetPosition(oldline, oldseg)
558 r.Advance(match[1] - match[0])
559 return result
560 }
561
562 func readRuneReader(r Reader) (rune, int, error) {
563 line, _ := r.PeekLine()
564 if line == nil {
565 return 0, 0, io.EOF
566 }
567 rn, size := utf8.DecodeRune(line)
568 if rn == utf8.RuneError {
569 return 0, 0, io.EOF
570 }
571 r.Advance(size)
572 return rn, size, nil
573 }
574
575 func findClosureReader(r Reader, opener, closer byte, opts FindClosureOptions) (*Segments, bool) {
576 opened := 1
577 codeSpanOpener := 0
578 closed := false
579 orgline, orgpos := r.Position()
580 var ret *Segments
581
582 for {
583 bs, seg := r.PeekLine()
584 if bs == nil {
585 goto end
586 }
587 i := 0
588 for i < len(bs) {
589 c := bs[i]
590 if opts.CodeSpan && codeSpanOpener != 0 && c == '`' {
591 codeSpanCloser := 0
592 for ; i < len(bs); i++ {
593 if bs[i] == '`' {
594 codeSpanCloser++
595 } else {
596 i--
597 break
598 }
599 }
600 if codeSpanCloser == codeSpanOpener {
601 codeSpanOpener = 0
602 }
603 } else if codeSpanOpener == 0 && c == '\\' && i < len(bs)-1 && util.IsPunct(bs[i+1]) {
604 i += 2
605 continue
606 } else if opts.CodeSpan && codeSpanOpener == 0 && c == '`' {
607 for ; i < len(bs); i++ {
608 if bs[i] == '`' {
609 codeSpanOpener++
610 } else {
611 i--
612 break
613 }
614 }
615 } else if (opts.CodeSpan && codeSpanOpener == 0) || !opts.CodeSpan {
616 if c == closer {
617 opened--
618 if opened == 0 {
619 if ret == nil {
620 ret = NewSegments()
621 }
622 ret.Append(seg.WithStop(seg.Start + i))
623 r.Advance(i + 1)
624 closed = true
625 goto end
626 }
627 } else if c == opener {
628 if !opts.Nesting {
629 goto end
630 }
631 opened++
632 }
633 }
634 i++
635 }
636 if !opts.Newline {
637 goto end
638 }
639 r.AdvanceLine()
640 if ret == nil {
641 ret = NewSegments()
642 }
643 ret.Append(seg)
644 }
645 end:
646 if !opts.Advance {
647 r.SetPosition(orgline, orgpos)
648 }
649 if closed {
650 return ret, true
651 }
652 return nil, false
653 }
654
View as plain text