1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38 package godoc
39
40 import (
41 "bufio"
42 "bytes"
43 "encoding/gob"
44 "errors"
45 "fmt"
46 "go/ast"
47 "go/doc"
48 "go/parser"
49 "go/token"
50 "index/suffixarray"
51 "io"
52 "log"
53 "math"
54 "os"
55 pathpkg "path"
56 "path/filepath"
57 "regexp"
58 "runtime"
59 "sort"
60 "strconv"
61 "strings"
62 "sync"
63 "time"
64 "unicode"
65
66 "golang.org/x/tools/godoc/util"
67 "golang.org/x/tools/godoc/vfs"
68 )
69
70
71
72
73
74 type comparer func(x, y interface{}) bool
75
76 type interfaceSlice struct {
77 slice []interface{}
78 less comparer
79 }
80
81
82
83
84
85
86
87
88
89
90 type RunList []interface{}
91
92 func (h RunList) sort(less comparer) {
93 sort.Sort(&interfaceSlice{h, less})
94 }
95
96 func (p *interfaceSlice) Len() int { return len(p.slice) }
97 func (p *interfaceSlice) Less(i, j int) bool { return p.less(p.slice[i], p.slice[j]) }
98 func (p *interfaceSlice) Swap(i, j int) { p.slice[i], p.slice[j] = p.slice[j], p.slice[i] }
99
100
101
102 func (h RunList) reduce(less comparer, newRun func(h RunList) interface{}) RunList {
103 if len(h) == 0 {
104 return nil
105 }
106
107
108
109 h.sort(less)
110
111
112 var hh RunList
113 i, x := 0, h[0]
114 for j, y := range h {
115 if less(x, y) {
116 hh = append(hh, newRun(h[i:j]))
117 i, x = j, h[j]
118 }
119 }
120
121 if i < len(h) {
122 hh = append(hh, newRun(h[i:]))
123 }
124
125 return hh
126 }
127
128
129
130
131
132 const removeDuplicates = true
133
134
135
136
137 type KindRun []SpotInfo
138
139
140
141 func (k KindRun) Len() int { return len(k) }
142 func (k KindRun) Less(i, j int) bool { return k[i].Lori() < k[j].Lori() }
143 func (k KindRun) Swap(i, j int) { k[i], k[j] = k[j], k[i] }
144
145
146 func lessKind(x, y interface{}) bool { return x.(SpotInfo).Kind() < y.(SpotInfo).Kind() }
147
148
149 func newKindRun(h RunList) interface{} {
150 run := make(KindRun, len(h))
151 for i, x := range h {
152 run[i] = x.(SpotInfo)
153 }
154
155
156
157 sort.Sort(run)
158
159 if removeDuplicates {
160
161
162
163
164 k := 0
165 prev := SpotInfo(math.MaxUint32)
166 for _, x := range run {
167 if x != prev {
168 run[k] = x
169 k++
170 prev = x
171 }
172 }
173 run = run[0:k]
174 }
175
176 return run
177 }
178
179
180
181
182
183 type Pak struct {
184 Path string
185 Name string
186 }
187
188
189 func (p *Pak) less(q *Pak) bool {
190 return p.Name < q.Name || p.Name == q.Name && p.Path < q.Path
191 }
192
193
194 type File struct {
195 Name string
196 Pak *Pak
197 }
198
199
200 func (f *File) Path() string {
201 return pathpkg.Join(f.Pak.Path, f.Name)
202 }
203
204
205 type Spot struct {
206 File *File
207 Info SpotInfo
208 }
209
210
211 type FileRun struct {
212 File *File
213 Groups []KindRun
214 }
215
216
217 func lessSpot(x, y interface{}) bool {
218 fx := x.(Spot).File
219 fy := y.(Spot).File
220
221 px := fx.Pak.Path
222 py := fy.Pak.Path
223 return px < py || px == py && fx.Name < fy.Name
224 }
225
226
227 func newFileRun(h RunList) interface{} {
228 file := h[0].(Spot).File
229
230
231 h1 := make(RunList, len(h))
232 for i, x := range h {
233 h1[i] = x.(Spot).Info
234 }
235 h2 := h1.reduce(lessKind, newKindRun)
236
237
238 groups := make([]KindRun, len(h2))
239 for i, x := range h2 {
240 groups[i] = x.(KindRun)
241 }
242 return &FileRun{file, groups}
243 }
244
245
246
247
248
249 type PakRun struct {
250 Pak *Pak
251 Files []*FileRun
252 }
253
254
255 func (p *PakRun) Len() int { return len(p.Files) }
256 func (p *PakRun) Less(i, j int) bool { return p.Files[i].File.Name < p.Files[j].File.Name }
257 func (p *PakRun) Swap(i, j int) { p.Files[i], p.Files[j] = p.Files[j], p.Files[i] }
258
259
260 func lessFileRun(x, y interface{}) bool {
261 return x.(*FileRun).File.Pak.less(y.(*FileRun).File.Pak)
262 }
263
264
265 func newPakRun(h RunList) interface{} {
266 pak := h[0].(*FileRun).File.Pak
267 files := make([]*FileRun, len(h))
268 for i, x := range h {
269 files[i] = x.(*FileRun)
270 }
271 run := &PakRun{pak, files}
272 sort.Sort(run)
273 return run
274 }
275
276
277
278
279
280 type HitList []*PakRun
281
282
283 func lessPakRun(x, y interface{}) bool { return x.(*PakRun).Pak.less(y.(*PakRun).Pak) }
284
285 func reduce(h0 RunList) HitList {
286
287 h1 := h0.reduce(lessSpot, newFileRun)
288
289 h2 := h1.reduce(lessFileRun, newPakRun)
290
291 h2.sort(lessPakRun)
292
293 h := make(HitList, len(h2))
294 for i, p := range h2 {
295 h[i] = p.(*PakRun)
296 }
297 return h
298 }
299
300
301
302 func (h HitList) filter(pakname string) HitList {
303 var hh HitList
304 for _, p := range h {
305 if p.Pak.Name == pakname {
306 hh = append(hh, p)
307 }
308 }
309 return hh
310 }
311
312
313
314
315 type wordPair struct {
316 canon string
317 alt string
318 }
319
320
321
322 type AltWords struct {
323 Canon string
324 Alts []string
325 }
326
327
328 func lessWordPair(x, y interface{}) bool { return x.(*wordPair).canon < y.(*wordPair).canon }
329
330
331 func newAltWords(h RunList) interface{} {
332 canon := h[0].(*wordPair).canon
333 alts := make([]string, len(h))
334 for i, x := range h {
335 alts[i] = x.(*wordPair).alt
336 }
337 return &AltWords{canon, alts}
338 }
339
340 func (a *AltWords) filter(s string) *AltWords {
341 var alts []string
342 for _, w := range a.Alts {
343 if w != s {
344 alts = append(alts, w)
345 }
346 }
347 if len(alts) > 0 {
348 return &AltWords{a.Canon, alts}
349 }
350 return nil
351 }
352
353
354
355 type Ident struct {
356 Path string
357 Package string
358 Name string
359 Doc string
360 }
361
362
363
364 type byImportCount struct {
365 Idents []Ident
366 ImportCount map[string]int
367 }
368
369 func (ic byImportCount) Len() int {
370 return len(ic.Idents)
371 }
372
373 func (ic byImportCount) Less(i, j int) bool {
374 ri := ic.ImportCount[ic.Idents[i].Path]
375 rj := ic.ImportCount[ic.Idents[j].Path]
376 if ri == rj {
377 return ic.Idents[i].Path < ic.Idents[j].Path
378 }
379 return ri > rj
380 }
381
382 func (ic byImportCount) Swap(i, j int) {
383 ic.Idents[i], ic.Idents[j] = ic.Idents[j], ic.Idents[i]
384 }
385
386 func (ic byImportCount) String() string {
387 buf := bytes.NewBuffer([]byte("["))
388 for _, v := range ic.Idents {
389 buf.WriteString(fmt.Sprintf("\n\t%s, %s (%d)", v.Path, v.Name, ic.ImportCount[v.Path]))
390 }
391 buf.WriteString("\n]")
392 return buf.String()
393 }
394
395
396
397 func (ic byImportCount) filter(pakname string) []Ident {
398 if ic.Idents == nil {
399 return nil
400 }
401 var res []Ident
402 for _, i := range ic.Idents {
403 if i.Package == pakname {
404 res = append(res, i)
405 }
406 }
407 return res
408 }
409
410
411 func (ic byImportCount) top(n int) []Ident {
412 if len(ic.Idents) > n {
413 return ic.Idents[:n]
414 }
415 return ic.Idents
416 }
417
418
419
420
421 type IndexResult struct {
422 Decls RunList
423 Others RunList
424 }
425
426
427 type Statistics struct {
428 Bytes int
429 Files int
430 Lines int
431 Words int
432 Spots int
433 }
434
435
436
437
438
439 type Indexer struct {
440 c *Corpus
441 fset *token.FileSet
442 fsOpenGate chan bool
443
444 mu sync.Mutex
445 sources bytes.Buffer
446 strings map[string]string
447 packages map[Pak]*Pak
448 words map[string]*IndexResult
449 snippets []*Snippet
450 current *token.File
451 file *File
452 decl ast.Decl
453 stats Statistics
454 throttle *util.Throttle
455 importCount map[string]int
456 packagePath map[string]map[string]bool
457 exports map[string]map[string]SpotKind
458 curPkgExports map[string]SpotKind
459 idents map[SpotKind]map[string][]Ident
460 }
461
462 func (x *Indexer) intern(s string) string {
463 if s, ok := x.strings[s]; ok {
464 return s
465 }
466 x.strings[s] = s
467 return s
468 }
469
470 func (x *Indexer) lookupPackage(path, name string) *Pak {
471
472
473
474
475 key := Pak{Path: x.intern(path), Name: x.intern(name)}
476 pak := x.packages[key]
477 if pak == nil {
478 pak = &key
479 x.packages[key] = pak
480 }
481 return pak
482 }
483
484 func (x *Indexer) addSnippet(s *Snippet) int {
485 index := len(x.snippets)
486 x.snippets = append(x.snippets, s)
487 return index
488 }
489
490 func (x *Indexer) visitIdent(kind SpotKind, id *ast.Ident) {
491 if id == nil {
492 return
493 }
494 name := x.intern(id.Name)
495
496 switch kind {
497 case TypeDecl, FuncDecl, ConstDecl, VarDecl:
498 x.curPkgExports[name] = kind
499 }
500
501 lists, found := x.words[name]
502 if !found {
503 lists = new(IndexResult)
504 x.words[name] = lists
505 }
506
507 if kind == Use || x.decl == nil {
508 if x.c.IndexGoCode {
509
510 info := makeSpotInfo(kind, x.current.Line(id.Pos()), false)
511 lists.Others = append(lists.Others, Spot{x.file, info})
512 }
513 } else {
514
515 index := x.addSnippet(NewSnippet(x.fset, x.decl, id))
516 info := makeSpotInfo(kind, index, true)
517 lists.Decls = append(lists.Decls, Spot{x.file, info})
518 }
519
520 x.stats.Spots++
521 }
522
523 func (x *Indexer) visitFieldList(kind SpotKind, flist *ast.FieldList) {
524 for _, f := range flist.List {
525 x.decl = nil
526 for _, name := range f.Names {
527 x.visitIdent(kind, name)
528 }
529 ast.Walk(x, f.Type)
530
531 }
532 }
533
534 func (x *Indexer) visitSpec(kind SpotKind, spec ast.Spec) {
535 switch n := spec.(type) {
536 case *ast.ImportSpec:
537 x.visitIdent(ImportDecl, n.Name)
538 if n.Path != nil {
539 if imp, err := strconv.Unquote(n.Path.Value); err == nil {
540 x.importCount[x.intern(imp)]++
541 }
542 }
543
544 case *ast.ValueSpec:
545 for _, n := range n.Names {
546 x.visitIdent(kind, n)
547 }
548 ast.Walk(x, n.Type)
549 for _, v := range n.Values {
550 ast.Walk(x, v)
551 }
552
553 case *ast.TypeSpec:
554 x.visitIdent(TypeDecl, n.Name)
555 ast.Walk(x, n.Type)
556 }
557 }
558
559 func (x *Indexer) visitGenDecl(decl *ast.GenDecl) {
560 kind := VarDecl
561 if decl.Tok == token.CONST {
562 kind = ConstDecl
563 }
564 x.decl = decl
565 for _, s := range decl.Specs {
566 x.visitSpec(kind, s)
567 }
568 }
569
570 func (x *Indexer) Visit(node ast.Node) ast.Visitor {
571 switch n := node.(type) {
572 case nil:
573
574
575 case *ast.Ident:
576 x.visitIdent(Use, n)
577
578 case *ast.FieldList:
579 x.visitFieldList(VarDecl, n)
580
581 case *ast.InterfaceType:
582 x.visitFieldList(MethodDecl, n.Methods)
583
584 case *ast.DeclStmt:
585
586
587 if decl, ok := n.Decl.(*ast.GenDecl); ok {
588 x.decl = nil
589 x.visitGenDecl(decl)
590 }
591
592 case *ast.GenDecl:
593 x.decl = n
594 x.visitGenDecl(n)
595
596 case *ast.FuncDecl:
597 kind := FuncDecl
598 if n.Recv != nil {
599 kind = MethodDecl
600 ast.Walk(x, n.Recv)
601 }
602 x.decl = n
603 x.visitIdent(kind, n.Name)
604 ast.Walk(x, n.Type)
605 if n.Body != nil {
606 ast.Walk(x, n.Body)
607 }
608
609 case *ast.File:
610 x.decl = nil
611 x.visitIdent(PackageClause, n.Name)
612 for _, d := range n.Decls {
613 ast.Walk(x, d)
614 }
615
616 default:
617 return x
618 }
619
620 return nil
621 }
622
623
624
625
626 func (x *Indexer) addFile(f vfs.ReadSeekCloser, filename string, goFile bool) (file *token.File, ast *ast.File) {
627 defer f.Close()
628
629
630
631
632
633
634
635
636 x.sources.WriteByte(0)
637
638
639
640 base := x.fset.Base()
641 if x.sources.Len() != base {
642 panic("internal error: file base incorrect")
643 }
644
645
646 if _, err := x.sources.ReadFrom(f); err == nil {
647 src := x.sources.Bytes()[base:]
648
649 if goFile {
650
651 if ast, err = parser.ParseFile(x.fset, filename, src, parser.ParseComments); err == nil {
652 file = x.fset.File(ast.Pos())
653 return
654 }
655
656
657
658
659
660 file = x.fset.File(token.Pos(base))
661 file.SetLinesForContent(src)
662 ast = nil
663 return
664 }
665
666 if util.IsText(src) {
667
668 file = x.fset.AddFile(filename, x.fset.Base(), len(src))
669 file.SetLinesForContent(src)
670 return
671 }
672 }
673
674
675 x.sources.Truncate(base - 1)
676 return
677 }
678
679
680
681
682
683
684
685
686 var whitelisted = map[string]bool{
687 ".bash": true,
688 ".c": true,
689 ".cc": true,
690 ".cpp": true,
691 ".cxx": true,
692 ".css": true,
693 ".go": true,
694 ".goc": true,
695 ".h": true,
696 ".hh": true,
697 ".hpp": true,
698 ".hxx": true,
699 ".html": true,
700 ".js": true,
701 ".out": true,
702 ".py": true,
703 ".s": true,
704 ".sh": true,
705 ".txt": true,
706 ".xml": true,
707 "AUTHORS": true,
708 "CONTRIBUTORS": true,
709 "LICENSE": true,
710 "Makefile": true,
711 "PATENTS": true,
712 "README": true,
713 }
714
715
716
717
718 func isWhitelisted(filename string) bool {
719 key := pathpkg.Ext(filename)
720 if key == "" {
721
722 key = filename
723 }
724 return whitelisted[key]
725 }
726
727 func (x *Indexer) indexDocs(dirname string, filename string, astFile *ast.File) {
728 pkgName := x.intern(astFile.Name.Name)
729 if pkgName == "main" {
730 return
731 }
732 pkgPath := x.intern(strings.TrimPrefix(strings.TrimPrefix(dirname, "/src/"), "pkg/"))
733 astPkg := ast.Package{
734 Name: pkgName,
735 Files: map[string]*ast.File{
736 filename: astFile,
737 },
738 }
739 var m doc.Mode
740 docPkg := doc.New(&astPkg, dirname, m)
741 addIdent := func(sk SpotKind, name string, docstr string) {
742 if x.idents[sk] == nil {
743 x.idents[sk] = make(map[string][]Ident)
744 }
745 name = x.intern(name)
746 x.idents[sk][name] = append(x.idents[sk][name], Ident{
747 Path: pkgPath,
748 Package: pkgName,
749 Name: name,
750 Doc: doc.Synopsis(docstr),
751 })
752 }
753
754 if x.idents[PackageClause] == nil {
755 x.idents[PackageClause] = make(map[string][]Ident)
756 }
757
758
759
760 words := strings.Split(pathpkg.Dir(pkgPath), "/")
761 if words[0] == "." {
762 words = []string{}
763 }
764 name := x.intern(docPkg.Name)
765 synopsis := doc.Synopsis(docPkg.Doc)
766 words = append(words, name)
767 pkgIdent := Ident{
768 Path: pkgPath,
769 Package: pkgName,
770 Name: name,
771 Doc: synopsis,
772 }
773 for _, word := range words {
774 word = x.intern(word)
775 found := false
776 pkgs := x.idents[PackageClause][word]
777 for i, p := range pkgs {
778 if p.Path == pkgPath {
779 if docPkg.Doc != "" {
780 p.Doc = synopsis
781 pkgs[i] = p
782 }
783 found = true
784 break
785 }
786 }
787 if !found {
788 x.idents[PackageClause][word] = append(x.idents[PackageClause][word], pkgIdent)
789 }
790 }
791
792 for _, c := range docPkg.Consts {
793 for _, name := range c.Names {
794 addIdent(ConstDecl, name, c.Doc)
795 }
796 }
797 for _, t := range docPkg.Types {
798 addIdent(TypeDecl, t.Name, t.Doc)
799 for _, c := range t.Consts {
800 for _, name := range c.Names {
801 addIdent(ConstDecl, name, c.Doc)
802 }
803 }
804 for _, v := range t.Vars {
805 for _, name := range v.Names {
806 addIdent(VarDecl, name, v.Doc)
807 }
808 }
809 for _, f := range t.Funcs {
810 addIdent(FuncDecl, f.Name, f.Doc)
811 }
812 for _, f := range t.Methods {
813 addIdent(MethodDecl, f.Name, f.Doc)
814
815
816 idents := x.idents[MethodDecl][f.Name]
817 idents[len(idents)-1].Name = x.intern(t.Name + "." + f.Name)
818 }
819 }
820 for _, v := range docPkg.Vars {
821 for _, name := range v.Names {
822 addIdent(VarDecl, name, v.Doc)
823 }
824 }
825 for _, f := range docPkg.Funcs {
826 addIdent(FuncDecl, f.Name, f.Doc)
827 }
828 }
829
830 func (x *Indexer) indexGoFile(dirname string, filename string, file *token.File, astFile *ast.File) {
831 pkgName := astFile.Name.Name
832
833 if x.c.IndexGoCode {
834 x.current = file
835 pak := x.lookupPackage(dirname, pkgName)
836 x.file = &File{filename, pak}
837 ast.Walk(x, astFile)
838 }
839
840 if x.c.IndexDocs {
841
842
843 isTestFile := (x.c.IndexGoCode || x.c.IndexFullText) &&
844 (strings.HasSuffix(filename, "_test.go") || strings.HasPrefix(dirname, "/test/"))
845 if !isTestFile {
846 x.indexDocs(dirname, filename, astFile)
847 }
848 }
849
850 ppKey := x.intern(pkgName)
851 if _, ok := x.packagePath[ppKey]; !ok {
852 x.packagePath[ppKey] = make(map[string]bool)
853 }
854 pkgPath := x.intern(strings.TrimPrefix(strings.TrimPrefix(dirname, "/src/"), "pkg/"))
855 x.packagePath[ppKey][pkgPath] = true
856
857
858
859 if len(x.curPkgExports) > 0 {
860 dest, ok := x.exports[pkgPath]
861 if !ok {
862 dest = make(map[string]SpotKind)
863 x.exports[pkgPath] = dest
864 }
865 for k, v := range x.curPkgExports {
866 dest[k] = v
867 }
868 }
869 }
870
871 func (x *Indexer) visitFile(dirname string, fi os.FileInfo) {
872 if fi.IsDir() || !x.c.IndexEnabled {
873 return
874 }
875
876 filename := pathpkg.Join(dirname, fi.Name())
877 goFile := isGoFile(fi)
878
879 switch {
880 case x.c.IndexFullText:
881 if !isWhitelisted(fi.Name()) {
882 return
883 }
884 case x.c.IndexGoCode:
885 if !goFile {
886 return
887 }
888 case x.c.IndexDocs:
889 if !goFile ||
890 strings.HasSuffix(fi.Name(), "_test.go") ||
891 strings.HasPrefix(dirname, "/test/") {
892 return
893 }
894 default:
895
896 return
897 }
898
899 x.fsOpenGate <- true
900 defer func() { <-x.fsOpenGate }()
901
902
903 f, err := x.c.fs.Open(filename)
904 if err != nil {
905 return
906 }
907
908 x.mu.Lock()
909 defer x.mu.Unlock()
910
911 x.throttle.Throttle()
912
913 x.curPkgExports = make(map[string]SpotKind)
914 file, fast := x.addFile(f, filename, goFile)
915 if file == nil {
916 return
917 }
918
919 if fast != nil {
920 x.indexGoFile(dirname, fi.Name(), file, fast)
921 }
922
923
924 x.stats.Bytes += file.Size()
925 x.stats.Files++
926 x.stats.Lines += file.LineCount()
927 }
928
929
930 type indexOptions struct {
931
932
933
934 Docs bool
935
936
937
938
939 GoCode bool
940
941
942
943
944 FullText bool
945
946
947
948 MaxResults int
949 }
950
951
952
953
954 type LookupResult struct {
955 Decls HitList
956 Others HitList
957 }
958
959 type Index struct {
960 fset *token.FileSet
961 suffixes *suffixarray.Index
962 words map[string]*LookupResult
963 alts map[string]*AltWords
964 snippets []*Snippet
965 stats Statistics
966 importCount map[string]int
967 packagePath map[string]map[string]bool
968 exports map[string]map[string]SpotKind
969 idents map[SpotKind]map[string][]Ident
970 opts indexOptions
971 }
972
973 func canonical(w string) string { return strings.ToLower(w) }
974
975
976
977
978 const (
979 maxOpenFiles = 200
980 maxOpenDirs = 50
981 )
982
983 func (c *Corpus) throttle() float64 {
984 if c.IndexThrottle <= 0 {
985 return 0.9
986 }
987 if c.IndexThrottle > 1.0 {
988 return 1.0
989 }
990 return c.IndexThrottle
991 }
992
993
994 func (c *Corpus) NewIndex() *Index {
995
996
997 x := &Indexer{
998 c: c,
999 fset: token.NewFileSet(),
1000 fsOpenGate: make(chan bool, maxOpenFiles),
1001 strings: make(map[string]string),
1002 packages: make(map[Pak]*Pak, 256),
1003 words: make(map[string]*IndexResult, 8192),
1004 throttle: util.NewThrottle(c.throttle(), 100*time.Millisecond),
1005 importCount: make(map[string]int),
1006 packagePath: make(map[string]map[string]bool),
1007 exports: make(map[string]map[string]SpotKind),
1008 idents: make(map[SpotKind]map[string][]Ident, 4),
1009 }
1010
1011
1012 var wg sync.WaitGroup
1013 dirGate := make(chan bool, maxOpenDirs)
1014 for dirname := range c.fsDirnames() {
1015 if c.IndexDirectory != nil && !c.IndexDirectory(dirname) {
1016 continue
1017 }
1018 dirGate <- true
1019 wg.Add(1)
1020 go func(dirname string) {
1021 defer func() { <-dirGate }()
1022 defer wg.Done()
1023
1024 list, err := c.fs.ReadDir(dirname)
1025 if err != nil {
1026 log.Printf("ReadDir(%q): %v; skipping directory", dirname, err)
1027 return
1028 }
1029 for _, fi := range list {
1030 wg.Add(1)
1031 go func(fi os.FileInfo) {
1032 defer wg.Done()
1033 x.visitFile(dirname, fi)
1034 }(fi)
1035 }
1036 }(dirname)
1037 }
1038 wg.Wait()
1039
1040 if !c.IndexFullText {
1041
1042
1043
1044 x.fset = nil
1045 x.sources.Reset()
1046 x.current = nil
1047 }
1048
1049
1050
1051
1052 words := make(map[string]*LookupResult)
1053 var wlist RunList
1054 for w, h := range x.words {
1055 decls := reduce(h.Decls)
1056 others := reduce(h.Others)
1057 words[w] = &LookupResult{
1058 Decls: decls,
1059 Others: others,
1060 }
1061 wlist = append(wlist, &wordPair{canonical(w), w})
1062 x.throttle.Throttle()
1063 }
1064 x.stats.Words = len(words)
1065
1066
1067
1068 alist := wlist.reduce(lessWordPair, newAltWords)
1069
1070
1071 alts := make(map[string]*AltWords)
1072 for i := 0; i < len(alist); i++ {
1073 a := alist[i].(*AltWords)
1074 alts[a.Canon] = a
1075 }
1076
1077
1078 var suffixes *suffixarray.Index
1079 if c.IndexFullText {
1080 suffixes = suffixarray.New(x.sources.Bytes())
1081 }
1082
1083
1084 for _, idMap := range x.idents {
1085 for _, ir := range idMap {
1086 sort.Sort(byImportCount{ir, x.importCount})
1087 }
1088 }
1089
1090 return &Index{
1091 fset: x.fset,
1092 suffixes: suffixes,
1093 words: words,
1094 alts: alts,
1095 snippets: x.snippets,
1096 stats: x.stats,
1097 importCount: x.importCount,
1098 packagePath: x.packagePath,
1099 exports: x.exports,
1100 idents: x.idents,
1101 opts: indexOptions{
1102 Docs: x.c.IndexDocs,
1103 GoCode: x.c.IndexGoCode,
1104 FullText: x.c.IndexFullText,
1105 MaxResults: x.c.MaxResults,
1106 },
1107 }
1108 }
1109
1110 var ErrFileIndexVersion = errors.New("file index version out of date")
1111
1112 const fileIndexVersion = 3
1113
1114
1115
1116 type fileIndex struct {
1117 Version int
1118 Words map[string]*LookupResult
1119 Alts map[string]*AltWords
1120 Snippets []*Snippet
1121 Fulltext bool
1122 Stats Statistics
1123 ImportCount map[string]int
1124 PackagePath map[string]map[string]bool
1125 Exports map[string]map[string]SpotKind
1126 Idents map[SpotKind]map[string][]Ident
1127 Opts indexOptions
1128 }
1129
1130 func (x *fileIndex) Write(w io.Writer) error {
1131 return gob.NewEncoder(w).Encode(x)
1132 }
1133
1134 func (x *fileIndex) Read(r io.Reader) error {
1135 return gob.NewDecoder(r).Decode(x)
1136 }
1137
1138
1139 func (x *Index) WriteTo(w io.Writer) (n int64, err error) {
1140 w = countingWriter{&n, w}
1141 fulltext := false
1142 if x.suffixes != nil {
1143 fulltext = true
1144 }
1145 fx := fileIndex{
1146 Version: fileIndexVersion,
1147 Words: x.words,
1148 Alts: x.alts,
1149 Snippets: x.snippets,
1150 Fulltext: fulltext,
1151 Stats: x.stats,
1152 ImportCount: x.importCount,
1153 PackagePath: x.packagePath,
1154 Exports: x.exports,
1155 Idents: x.idents,
1156 Opts: x.opts,
1157 }
1158 if err := fx.Write(w); err != nil {
1159 return 0, err
1160 }
1161 if fulltext {
1162 encode := func(x interface{}) error {
1163 return gob.NewEncoder(w).Encode(x)
1164 }
1165 if err := x.fset.Write(encode); err != nil {
1166 return 0, err
1167 }
1168 if err := x.suffixes.Write(w); err != nil {
1169 return 0, err
1170 }
1171 }
1172 return n, nil
1173 }
1174
1175
1176
1177
1178 func (x *Index) ReadFrom(r io.Reader) (n int64, err error) {
1179
1180 if _, ok := r.(io.ByteReader); !ok {
1181 r = bufio.NewReader(r)
1182 }
1183 r = countingReader{&n, r.(byteReader)}
1184 var fx fileIndex
1185 if err := fx.Read(r); err != nil {
1186 return n, err
1187 }
1188 if fx.Version != fileIndexVersion {
1189 return 0, ErrFileIndexVersion
1190 }
1191 x.words = fx.Words
1192 x.alts = fx.Alts
1193 x.snippets = fx.Snippets
1194 x.stats = fx.Stats
1195 x.importCount = fx.ImportCount
1196 x.packagePath = fx.PackagePath
1197 x.exports = fx.Exports
1198 x.idents = fx.Idents
1199 x.opts = fx.Opts
1200 if fx.Fulltext {
1201 x.fset = token.NewFileSet()
1202 decode := func(x interface{}) error {
1203 return gob.NewDecoder(r).Decode(x)
1204 }
1205 if err := x.fset.Read(decode); err != nil {
1206 return n, err
1207 }
1208 x.suffixes = new(suffixarray.Index)
1209 if err := x.suffixes.Read(r); err != nil {
1210 return n, err
1211 }
1212 }
1213 return n, nil
1214 }
1215
1216
1217 func (x *Index) Stats() Statistics {
1218 return x.stats
1219 }
1220
1221
1222 func (x *Index) ImportCount() map[string]int {
1223 return x.importCount
1224 }
1225
1226
1227
1228 func (x *Index) PackagePath() map[string]map[string]bool {
1229 return x.packagePath
1230 }
1231
1232
1233
1234 func (x *Index) Exports() map[string]map[string]SpotKind {
1235 return x.exports
1236 }
1237
1238
1239
1240 func (x *Index) Idents() map[SpotKind]map[string][]Ident {
1241 return x.idents
1242 }
1243
1244 func (x *Index) lookupWord(w string) (match *LookupResult, alt *AltWords) {
1245 match = x.words[w]
1246 alt = x.alts[canonical(w)]
1247
1248
1249
1250 if match != nil && alt != nil {
1251 alt = alt.filter(w)
1252 }
1253 return
1254 }
1255
1256
1257 func isIdentifier(s string) bool {
1258 for i, ch := range s {
1259 if unicode.IsLetter(ch) || ch == '_' || i > 0 && unicode.IsDigit(ch) {
1260 continue
1261 }
1262 return false
1263 }
1264 return len(s) > 0
1265 }
1266
1267
1268
1269
1270
1271 func (x *Index) Lookup(query string) (*SearchResult, error) {
1272 ss := strings.Split(query, ".")
1273
1274
1275 for _, s := range ss {
1276 if !isIdentifier(s) {
1277 return nil, errors.New("all query parts must be identifiers")
1278 }
1279 }
1280 rslt := &SearchResult{
1281 Query: query,
1282 Idents: make(map[SpotKind][]Ident, 5),
1283 }
1284
1285 switch len(ss) {
1286 case 1:
1287 ident := ss[0]
1288 rslt.Hit, rslt.Alt = x.lookupWord(ident)
1289 if rslt.Hit != nil {
1290
1291
1292 rslt.Pak = rslt.Hit.Others.filter(ident)
1293 }
1294 for k, v := range x.idents {
1295 const rsltLimit = 50
1296 ids := byImportCount{v[ident], x.importCount}
1297 rslt.Idents[k] = ids.top(rsltLimit)
1298 }
1299
1300 case 2:
1301 pakname, ident := ss[0], ss[1]
1302 rslt.Hit, rslt.Alt = x.lookupWord(ident)
1303 if rslt.Hit != nil {
1304
1305
1306 decls := rslt.Hit.Decls.filter(pakname)
1307 others := rslt.Hit.Others.filter(pakname)
1308 rslt.Hit = &LookupResult{decls, others}
1309 }
1310 for k, v := range x.idents {
1311 ids := byImportCount{v[ident], x.importCount}
1312 rslt.Idents[k] = ids.filter(pakname)
1313 }
1314
1315 default:
1316 return nil, errors.New("query is not a (qualified) identifier")
1317 }
1318
1319 return rslt, nil
1320 }
1321
1322 func (x *Index) Snippet(i int) *Snippet {
1323
1324 if 0 <= i && i < len(x.snippets) {
1325 return x.snippets[i]
1326 }
1327 return nil
1328 }
1329
1330 type positionList []struct {
1331 filename string
1332 line int
1333 }
1334
1335 func (list positionList) Len() int { return len(list) }
1336 func (list positionList) Less(i, j int) bool { return list[i].filename < list[j].filename }
1337 func (list positionList) Swap(i, j int) { list[i], list[j] = list[j], list[i] }
1338
1339
1340 func unique(list []int) []int {
1341 sort.Ints(list)
1342 var last int
1343 i := 0
1344 for _, x := range list {
1345 if i == 0 || x != last {
1346 last = x
1347 list[i] = x
1348 i++
1349 }
1350 }
1351 return list[0:i]
1352 }
1353
1354
1355 type FileLines struct {
1356 Filename string
1357 Lines []int
1358 }
1359
1360
1361
1362
1363 func (x *Index) LookupRegexp(r *regexp.Regexp, n int) (found int, result []FileLines) {
1364 if x.suffixes == nil || n <= 0 {
1365 return
1366 }
1367
1368
1369 var list positionList
1370
1371
1372
1373
1374
1375
1376 for n1 := n; found < n; n1 += n - found {
1377 found = 0
1378 matches := x.suffixes.FindAllIndex(r, n1)
1379
1380
1381 list = make(positionList, len(matches))
1382 for _, m := range matches {
1383
1384
1385 p := token.Pos(m[0])
1386 if file := x.fset.File(p); file != nil {
1387 if base := file.Base(); base <= m[1] && m[1] <= base+file.Size() {
1388
1389 list[found].filename = file.Name()
1390 list[found].line = file.Line(p)
1391 found++
1392 }
1393 }
1394 }
1395 if found == n || len(matches) < n1 {
1396
1397 break
1398 }
1399 }
1400 list = list[0:found]
1401 sort.Sort(list)
1402
1403
1404 var last string
1405 var lines []int
1406 addLines := func() {
1407 if len(lines) > 0 {
1408
1409 result = append(result, FileLines{last, unique(lines)})
1410 lines = nil
1411 }
1412 }
1413 for _, m := range list {
1414 if m.filename != last {
1415 addLines()
1416 last = m.filename
1417 }
1418 lines = append(lines, m.line)
1419 }
1420 addLines()
1421
1422 return
1423 }
1424
1425
1426
1427 func (c *Corpus) invalidateIndex() {
1428 c.fsModified.Set(nil)
1429 c.refreshMetadata()
1430 }
1431
1432
1433
1434 func (c *Corpus) feedDirnames(ch chan<- string) {
1435 if dir, _ := c.fsTree.Get(); dir != nil {
1436 for d := range dir.(*Directory).iter(false) {
1437 ch <- d.Path
1438 }
1439 }
1440 }
1441
1442
1443
1444 func (c *Corpus) fsDirnames() <-chan string {
1445 ch := make(chan string, 256)
1446 go func() {
1447 c.feedDirnames(ch)
1448 close(ch)
1449 }()
1450 return ch
1451 }
1452
1453
1454
1455 func (x *Index) CompatibleWith(c *Corpus) bool {
1456 return x.opts.Docs == c.IndexDocs &&
1457 x.opts.GoCode == c.IndexGoCode &&
1458 x.opts.FullText == c.IndexFullText &&
1459 x.opts.MaxResults == c.MaxResults
1460 }
1461
1462 func (c *Corpus) readIndex(filenames string) error {
1463 matches, err := filepath.Glob(filenames)
1464 if err != nil {
1465 return err
1466 } else if matches == nil {
1467 return fmt.Errorf("no index files match %q", filenames)
1468 }
1469 sort.Strings(matches)
1470 files := make([]io.Reader, 0, len(matches))
1471 for _, filename := range matches {
1472 f, err := os.Open(filename)
1473 if err != nil {
1474 return err
1475 }
1476 defer f.Close()
1477 files = append(files, f)
1478 }
1479 return c.ReadIndexFrom(io.MultiReader(files...))
1480 }
1481
1482
1483 func (c *Corpus) ReadIndexFrom(r io.Reader) error {
1484 x := new(Index)
1485 if _, err := x.ReadFrom(r); err != nil {
1486 return err
1487 }
1488 if !x.CompatibleWith(c) {
1489 return fmt.Errorf("index file options are incompatible: %v", x.opts)
1490 }
1491 c.searchIndex.Set(x)
1492 return nil
1493 }
1494
1495 func (c *Corpus) UpdateIndex() {
1496 if c.Verbose {
1497 log.Printf("updating index...")
1498 }
1499 start := time.Now()
1500 index := c.NewIndex()
1501 stop := time.Now()
1502 c.searchIndex.Set(index)
1503 if c.Verbose {
1504 secs := stop.Sub(start).Seconds()
1505 stats := index.Stats()
1506 log.Printf("index updated (%gs, %d bytes of source, %d files, %d lines, %d unique words, %d spots)",
1507 secs, stats.Bytes, stats.Files, stats.Lines, stats.Words, stats.Spots)
1508 }
1509 memstats := new(runtime.MemStats)
1510 runtime.ReadMemStats(memstats)
1511 if c.Verbose {
1512 log.Printf("before GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys)
1513 }
1514 runtime.GC()
1515 runtime.ReadMemStats(memstats)
1516 if c.Verbose {
1517 log.Printf("after GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys)
1518 }
1519 }
1520
1521
1522 func (c *Corpus) RunIndexer() {
1523
1524 if c.IndexFiles != "" {
1525 c.initFSTree()
1526 if err := c.readIndex(c.IndexFiles); err != nil {
1527 log.Printf("error reading index from file %s: %v", c.IndexFiles, err)
1528 }
1529 return
1530 }
1531
1532
1533 for {
1534 c.initFSTree()
1535 c.UpdateIndex()
1536 if c.IndexInterval < 0 {
1537 return
1538 }
1539 delay := 5 * time.Minute
1540 if c.IndexInterval > 0 {
1541 delay = c.IndexInterval
1542 }
1543 time.Sleep(delay)
1544 }
1545 }
1546
1547 type countingWriter struct {
1548 n *int64
1549 w io.Writer
1550 }
1551
1552 func (c countingWriter) Write(p []byte) (n int, err error) {
1553 n, err = c.w.Write(p)
1554 *c.n += int64(n)
1555 return
1556 }
1557
1558 type byteReader interface {
1559 io.Reader
1560 io.ByteReader
1561 }
1562
1563 type countingReader struct {
1564 n *int64
1565 r byteReader
1566 }
1567
1568 func (c countingReader) Read(p []byte) (n int, err error) {
1569 n, err = c.r.Read(p)
1570 *c.n += int64(n)
1571 return
1572 }
1573
1574 func (c countingReader) ReadByte() (b byte, err error) {
1575 b, err = c.r.ReadByte()
1576 *c.n += 1
1577 return
1578 }
1579
View as plain text