...

Source file src/golang.org/x/tools/go/callgraph/rta/rta.go

Documentation: golang.org/x/tools/go/callgraph/rta

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This package provides Rapid Type Analysis (RTA) for Go, a fast
     6  // algorithm for call graph construction and discovery of reachable code
     7  // (and hence dead code) and runtime types.  The algorithm was first
     8  // described in:
     9  //
    10  // David F. Bacon and Peter F. Sweeney. 1996.
    11  // Fast static analysis of C++ virtual function calls. (OOPSLA '96)
    12  // http://doi.acm.org/10.1145/236337.236371
    13  //
    14  // The algorithm uses dynamic programming to tabulate the cross-product
    15  // of the set of known "address taken" functions with the set of known
    16  // dynamic calls of the same type.  As each new address-taken function
    17  // is discovered, call graph edges are added from each known callsite,
    18  // and as each new call site is discovered, call graph edges are added
    19  // from it to each known address-taken function.
    20  //
    21  // A similar approach is used for dynamic calls via interfaces: it
    22  // tabulates the cross-product of the set of known "runtime types",
    23  // i.e. types that may appear in an interface value, or be derived from
    24  // one via reflection, with the set of known "invoke"-mode dynamic
    25  // calls.  As each new "runtime type" is discovered, call edges are
    26  // added from the known call sites, and as each new call site is
    27  // discovered, call graph edges are added to each compatible
    28  // method.
    29  //
    30  // In addition, we must consider all exported methods of any runtime type
    31  // as reachable, since they may be called via reflection.
    32  //
    33  // Each time a newly added call edge causes a new function to become
    34  // reachable, the code of that function is analyzed for more call sites,
    35  // address-taken functions, and runtime types.  The process continues
    36  // until a fixed point is achieved.
    37  //
    38  // The resulting call graph is less precise than one produced by pointer
    39  // analysis, but the algorithm is much faster.  For example, running the
    40  // cmd/callgraph tool on its own source takes ~2.1s for RTA and ~5.4s
    41  // for points-to analysis.
    42  package rta // import "golang.org/x/tools/go/callgraph/rta"
    43  
    44  // TODO(adonovan): test it by connecting it to the interpreter and
    45  // replacing all "unreachable" functions by a special intrinsic, and
    46  // ensure that that intrinsic is never called.
    47  
    48  // TODO(zpavlinovic): decide if the clients must use ssa.InstantiateGenerics
    49  // mode when building programs with generics. It might be possible to
    50  // extend rta to accurately support generics with just ssa.BuilderMode(0).
    51  
    52  import (
    53  	"fmt"
    54  	"go/types"
    55  
    56  	"golang.org/x/tools/go/callgraph"
    57  	"golang.org/x/tools/go/ssa"
    58  	"golang.org/x/tools/go/types/typeutil"
    59  )
    60  
    61  // A Result holds the results of Rapid Type Analysis, which includes the
    62  // set of reachable functions/methods, runtime types, and the call graph.
    63  type Result struct {
    64  	// CallGraph is the discovered callgraph.
    65  	// It does not include edges for calls made via reflection.
    66  	CallGraph *callgraph.Graph
    67  
    68  	// Reachable contains the set of reachable functions and methods.
    69  	// This includes exported methods of runtime types, since
    70  	// they may be accessed via reflection.
    71  	// The value indicates whether the function is address-taken.
    72  	//
    73  	// (We wrap the bool in a struct to avoid inadvertent use of
    74  	// "if Reachable[f] {" to test for set membership.)
    75  	Reachable map[*ssa.Function]struct{ AddrTaken bool }
    76  
    77  	// RuntimeTypes contains the set of types that are needed at
    78  	// runtime, for interfaces or reflection.
    79  	//
    80  	// The value indicates whether the type is inaccessible to reflection.
    81  	// Consider:
    82  	// 	type A struct{B}
    83  	// 	fmt.Println(new(A))
    84  	// Types *A, A and B are accessible to reflection, but the unnamed
    85  	// type struct{B} is not.
    86  	RuntimeTypes typeutil.Map
    87  }
    88  
    89  // Working state of the RTA algorithm.
    90  type rta struct {
    91  	result *Result
    92  
    93  	prog *ssa.Program
    94  
    95  	worklist []*ssa.Function // list of functions to visit
    96  
    97  	// addrTakenFuncsBySig contains all address-taken *Functions, grouped by signature.
    98  	// Keys are *types.Signature, values are map[*ssa.Function]bool sets.
    99  	addrTakenFuncsBySig typeutil.Map
   100  
   101  	// dynCallSites contains all dynamic "call"-mode call sites, grouped by signature.
   102  	// Keys are *types.Signature, values are unordered []ssa.CallInstruction.
   103  	dynCallSites typeutil.Map
   104  
   105  	// invokeSites contains all "invoke"-mode call sites, grouped by interface.
   106  	// Keys are *types.Interface (never *types.Named),
   107  	// Values are unordered []ssa.CallInstruction sets.
   108  	invokeSites typeutil.Map
   109  
   110  	// The following two maps together define the subset of the
   111  	// m:n "implements" relation needed by the algorithm.
   112  
   113  	// concreteTypes maps each concrete type to the set of interfaces that it implements.
   114  	// Keys are types.Type, values are unordered []*types.Interface.
   115  	// Only concrete types used as MakeInterface operands are included.
   116  	concreteTypes typeutil.Map
   117  
   118  	// interfaceTypes maps each interface type to
   119  	// the set of concrete types that implement it.
   120  	// Keys are *types.Interface, values are unordered []types.Type.
   121  	// Only interfaces used in "invoke"-mode CallInstructions are included.
   122  	interfaceTypes typeutil.Map
   123  }
   124  
   125  // addReachable marks a function as potentially callable at run-time,
   126  // and ensures that it gets processed.
   127  func (r *rta) addReachable(f *ssa.Function, addrTaken bool) {
   128  	reachable := r.result.Reachable
   129  	n := len(reachable)
   130  	v := reachable[f]
   131  	if addrTaken {
   132  		v.AddrTaken = true
   133  	}
   134  	reachable[f] = v
   135  	if len(reachable) > n {
   136  		// First time seeing f.  Add it to the worklist.
   137  		r.worklist = append(r.worklist, f)
   138  	}
   139  }
   140  
   141  // addEdge adds the specified call graph edge, and marks it reachable.
   142  // addrTaken indicates whether to mark the callee as "address-taken".
   143  func (r *rta) addEdge(site ssa.CallInstruction, callee *ssa.Function, addrTaken bool) {
   144  	r.addReachable(callee, addrTaken)
   145  
   146  	if g := r.result.CallGraph; g != nil {
   147  		if site.Parent() == nil {
   148  			panic(site)
   149  		}
   150  		from := g.CreateNode(site.Parent())
   151  		to := g.CreateNode(callee)
   152  		callgraph.AddEdge(from, site, to)
   153  	}
   154  }
   155  
   156  // ---------- addrTakenFuncs × dynCallSites ----------
   157  
   158  // visitAddrTakenFunc is called each time we encounter an address-taken function f.
   159  func (r *rta) visitAddrTakenFunc(f *ssa.Function) {
   160  	// Create two-level map (Signature -> Function -> bool).
   161  	S := f.Signature
   162  	funcs, _ := r.addrTakenFuncsBySig.At(S).(map[*ssa.Function]bool)
   163  	if funcs == nil {
   164  		funcs = make(map[*ssa.Function]bool)
   165  		r.addrTakenFuncsBySig.Set(S, funcs)
   166  	}
   167  	if !funcs[f] {
   168  		// First time seeing f.
   169  		funcs[f] = true
   170  
   171  		// If we've seen any dyncalls of this type, mark it reachable,
   172  		// and add call graph edges.
   173  		sites, _ := r.dynCallSites.At(S).([]ssa.CallInstruction)
   174  		for _, site := range sites {
   175  			r.addEdge(site, f, true)
   176  		}
   177  	}
   178  }
   179  
   180  // visitDynCall is called each time we encounter a dynamic "call"-mode call.
   181  func (r *rta) visitDynCall(site ssa.CallInstruction) {
   182  	S := site.Common().Signature()
   183  
   184  	// Record the call site.
   185  	sites, _ := r.dynCallSites.At(S).([]ssa.CallInstruction)
   186  	r.dynCallSites.Set(S, append(sites, site))
   187  
   188  	// For each function of signature S that we know is address-taken,
   189  	// add an edge and mark it reachable.
   190  	funcs, _ := r.addrTakenFuncsBySig.At(S).(map[*ssa.Function]bool)
   191  	for g := range funcs {
   192  		r.addEdge(site, g, true)
   193  	}
   194  }
   195  
   196  // ---------- concrete types × invoke sites ----------
   197  
   198  // addInvokeEdge is called for each new pair (site, C) in the matrix.
   199  func (r *rta) addInvokeEdge(site ssa.CallInstruction, C types.Type) {
   200  	// Ascertain the concrete method of C to be called.
   201  	imethod := site.Common().Method
   202  	cmethod := r.prog.MethodValue(r.prog.MethodSets.MethodSet(C).Lookup(imethod.Pkg(), imethod.Name()))
   203  	r.addEdge(site, cmethod, true)
   204  }
   205  
   206  // visitInvoke is called each time the algorithm encounters an "invoke"-mode call.
   207  func (r *rta) visitInvoke(site ssa.CallInstruction) {
   208  	I := site.Common().Value.Type().Underlying().(*types.Interface)
   209  
   210  	// Record the invoke site.
   211  	sites, _ := r.invokeSites.At(I).([]ssa.CallInstruction)
   212  	r.invokeSites.Set(I, append(sites, site))
   213  
   214  	// Add callgraph edge for each existing
   215  	// address-taken concrete type implementing I.
   216  	for _, C := range r.implementations(I) {
   217  		r.addInvokeEdge(site, C)
   218  	}
   219  }
   220  
   221  // ---------- main algorithm ----------
   222  
   223  // visitFunc processes function f.
   224  func (r *rta) visitFunc(f *ssa.Function) {
   225  	var space [32]*ssa.Value // preallocate space for common case
   226  
   227  	for _, b := range f.Blocks {
   228  		for _, instr := range b.Instrs {
   229  			rands := instr.Operands(space[:0])
   230  
   231  			switch instr := instr.(type) {
   232  			case ssa.CallInstruction:
   233  				call := instr.Common()
   234  				if call.IsInvoke() {
   235  					r.visitInvoke(instr)
   236  				} else if g := call.StaticCallee(); g != nil {
   237  					r.addEdge(instr, g, false)
   238  				} else if _, ok := call.Value.(*ssa.Builtin); !ok {
   239  					r.visitDynCall(instr)
   240  				}
   241  
   242  				// Ignore the call-position operand when
   243  				// looking for address-taken Functions.
   244  				// Hack: assume this is rands[0].
   245  				rands = rands[1:]
   246  
   247  			case *ssa.MakeInterface:
   248  				r.addRuntimeType(instr.X.Type(), false)
   249  			}
   250  
   251  			// Process all address-taken functions.
   252  			for _, op := range rands {
   253  				if g, ok := (*op).(*ssa.Function); ok {
   254  					r.visitAddrTakenFunc(g)
   255  				}
   256  			}
   257  		}
   258  	}
   259  }
   260  
   261  // Analyze performs Rapid Type Analysis, starting at the specified root
   262  // functions.  It returns nil if no roots were specified.
   263  //
   264  // If buildCallGraph is true, Result.CallGraph will contain a call
   265  // graph; otherwise, only the other fields (reachable functions) are
   266  // populated.
   267  func Analyze(roots []*ssa.Function, buildCallGraph bool) *Result {
   268  	if len(roots) == 0 {
   269  		return nil
   270  	}
   271  
   272  	r := &rta{
   273  		result: &Result{Reachable: make(map[*ssa.Function]struct{ AddrTaken bool })},
   274  		prog:   roots[0].Prog,
   275  	}
   276  
   277  	if buildCallGraph {
   278  		// TODO(adonovan): change callgraph API to eliminate the
   279  		// notion of a distinguished root node.  Some callgraphs
   280  		// have many roots, or none.
   281  		r.result.CallGraph = callgraph.New(roots[0])
   282  	}
   283  
   284  	hasher := typeutil.MakeHasher()
   285  	r.result.RuntimeTypes.SetHasher(hasher)
   286  	r.addrTakenFuncsBySig.SetHasher(hasher)
   287  	r.dynCallSites.SetHasher(hasher)
   288  	r.invokeSites.SetHasher(hasher)
   289  	r.concreteTypes.SetHasher(hasher)
   290  	r.interfaceTypes.SetHasher(hasher)
   291  
   292  	// Visit functions, processing their instructions, and adding
   293  	// new functions to the worklist, until a fixed point is
   294  	// reached.
   295  	var shadow []*ssa.Function // for efficiency, we double-buffer the worklist
   296  	r.worklist = append(r.worklist, roots...)
   297  	for len(r.worklist) > 0 {
   298  		shadow, r.worklist = r.worklist, shadow[:0]
   299  		for _, f := range shadow {
   300  			r.visitFunc(f)
   301  		}
   302  	}
   303  	return r.result
   304  }
   305  
   306  // interfaces(C) returns all currently known interfaces implemented by C.
   307  func (r *rta) interfaces(C types.Type) []*types.Interface {
   308  	// Ascertain set of interfaces C implements
   309  	// and update 'implements' relation.
   310  	var ifaces []*types.Interface
   311  	r.interfaceTypes.Iterate(func(I types.Type, concs interface{}) {
   312  		if I := I.(*types.Interface); types.Implements(C, I) {
   313  			concs, _ := concs.([]types.Type)
   314  			r.interfaceTypes.Set(I, append(concs, C))
   315  			ifaces = append(ifaces, I)
   316  		}
   317  	})
   318  	r.concreteTypes.Set(C, ifaces)
   319  	return ifaces
   320  }
   321  
   322  // implementations(I) returns all currently known concrete types that implement I.
   323  func (r *rta) implementations(I *types.Interface) []types.Type {
   324  	var concs []types.Type
   325  	if v := r.interfaceTypes.At(I); v != nil {
   326  		concs = v.([]types.Type)
   327  	} else {
   328  		// First time seeing this interface.
   329  		// Update the 'implements' relation.
   330  		r.concreteTypes.Iterate(func(C types.Type, ifaces interface{}) {
   331  			if types.Implements(C, I) {
   332  				ifaces, _ := ifaces.([]*types.Interface)
   333  				r.concreteTypes.Set(C, append(ifaces, I))
   334  				concs = append(concs, C)
   335  			}
   336  		})
   337  		r.interfaceTypes.Set(I, concs)
   338  	}
   339  	return concs
   340  }
   341  
   342  // addRuntimeType is called for each concrete type that can be the
   343  // dynamic type of some interface or reflect.Value.
   344  // Adapted from needMethods in go/ssa/builder.go
   345  func (r *rta) addRuntimeType(T types.Type, skip bool) {
   346  	if prev, ok := r.result.RuntimeTypes.At(T).(bool); ok {
   347  		if skip && !prev {
   348  			r.result.RuntimeTypes.Set(T, skip)
   349  		}
   350  		return
   351  	}
   352  	r.result.RuntimeTypes.Set(T, skip)
   353  
   354  	mset := r.prog.MethodSets.MethodSet(T)
   355  
   356  	if _, ok := T.Underlying().(*types.Interface); !ok {
   357  		// T is a new concrete type.
   358  		for i, n := 0, mset.Len(); i < n; i++ {
   359  			sel := mset.At(i)
   360  			m := sel.Obj()
   361  
   362  			if m.Exported() {
   363  				// Exported methods are always potentially callable via reflection.
   364  				r.addReachable(r.prog.MethodValue(sel), true)
   365  			}
   366  		}
   367  
   368  		// Add callgraph edge for each existing dynamic
   369  		// "invoke"-mode call via that interface.
   370  		for _, I := range r.interfaces(T) {
   371  			sites, _ := r.invokeSites.At(I).([]ssa.CallInstruction)
   372  			for _, site := range sites {
   373  				r.addInvokeEdge(site, T)
   374  			}
   375  		}
   376  	}
   377  
   378  	// Precondition: T is not a method signature (*Signature with Recv()!=nil).
   379  	// Recursive case: skip => don't call makeMethods(T).
   380  	// Each package maintains its own set of types it has visited.
   381  
   382  	var n *types.Named
   383  	switch T := T.(type) {
   384  	case *types.Named:
   385  		n = T
   386  	case *types.Pointer:
   387  		n, _ = T.Elem().(*types.Named)
   388  	}
   389  	if n != nil {
   390  		owner := n.Obj().Pkg()
   391  		if owner == nil {
   392  			return // built-in error type
   393  		}
   394  	}
   395  
   396  	// Recursion over signatures of each exported method.
   397  	for i := 0; i < mset.Len(); i++ {
   398  		if mset.At(i).Obj().Exported() {
   399  			sig := mset.At(i).Type().(*types.Signature)
   400  			r.addRuntimeType(sig.Params(), true)  // skip the Tuple itself
   401  			r.addRuntimeType(sig.Results(), true) // skip the Tuple itself
   402  		}
   403  	}
   404  
   405  	switch t := T.(type) {
   406  	case *types.Basic:
   407  		// nop
   408  
   409  	case *types.Interface:
   410  		// nop---handled by recursion over method set.
   411  
   412  	case *types.Pointer:
   413  		r.addRuntimeType(t.Elem(), false)
   414  
   415  	case *types.Slice:
   416  		r.addRuntimeType(t.Elem(), false)
   417  
   418  	case *types.Chan:
   419  		r.addRuntimeType(t.Elem(), false)
   420  
   421  	case *types.Map:
   422  		r.addRuntimeType(t.Key(), false)
   423  		r.addRuntimeType(t.Elem(), false)
   424  
   425  	case *types.Signature:
   426  		if t.Recv() != nil {
   427  			panic(fmt.Sprintf("Signature %s has Recv %s", t, t.Recv()))
   428  		}
   429  		r.addRuntimeType(t.Params(), true)  // skip the Tuple itself
   430  		r.addRuntimeType(t.Results(), true) // skip the Tuple itself
   431  
   432  	case *types.Named:
   433  		// A pointer-to-named type can be derived from a named
   434  		// type via reflection.  It may have methods too.
   435  		r.addRuntimeType(types.NewPointer(T), false)
   436  
   437  		// Consider 'type T struct{S}' where S has methods.
   438  		// Reflection provides no way to get from T to struct{S},
   439  		// only to S, so the method set of struct{S} is unwanted,
   440  		// so set 'skip' flag during recursion.
   441  		r.addRuntimeType(t.Underlying(), true)
   442  
   443  	case *types.Array:
   444  		r.addRuntimeType(t.Elem(), false)
   445  
   446  	case *types.Struct:
   447  		for i, n := 0, t.NumFields(); i < n; i++ {
   448  			r.addRuntimeType(t.Field(i).Type(), false)
   449  		}
   450  
   451  	case *types.Tuple:
   452  		for i, n := 0, t.Len(); i < n; i++ {
   453  			r.addRuntimeType(t.At(i).Type(), false)
   454  		}
   455  
   456  	default:
   457  		panic(T)
   458  	}
   459  }
   460  

View as plain text