1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // CPU profiling. 6 // 7 // The signal handler for the profiling clock tick adds a new stack trace 8 // to a log of recent traces. The log is read by a user goroutine that 9 // turns it into formatted profile data. If the reader does not keep up 10 // with the log, those writes will be recorded as a count of lost records. 11 // The actual profile buffer is in profbuf.go. 12 13 package runtime 14 15 import ( 16 "internal/abi" 17 "runtime/internal/atomic" 18 "runtime/internal/sys" 19 "unsafe" 20 ) 21 22 const ( 23 maxCPUProfStack = 64 24 25 // profBufWordCount is the size of the CPU profile buffer's storage for the 26 // header and stack of each sample, measured in 64-bit words. Every sample 27 // has a required header of two words. With a small additional header (a 28 // word or two) and stacks at the profiler's maximum length of 64 frames, 29 // that capacity can support 1900 samples or 19 thread-seconds at a 100 Hz 30 // sample rate, at a cost of 1 MiB. 31 profBufWordCount = 1 << 17 32 // profBufTagCount is the size of the CPU profile buffer's storage for the 33 // goroutine tags associated with each sample. A capacity of 1<<14 means 34 // room for 16k samples, or 160 thread-seconds at a 100 Hz sample rate. 35 profBufTagCount = 1 << 14 36 ) 37 38 type cpuProfile struct { 39 lock mutex 40 on bool // profiling is on 41 log *profBuf // profile events written here 42 43 // extra holds extra stacks accumulated in addNonGo 44 // corresponding to profiling signals arriving on 45 // non-Go-created threads. Those stacks are written 46 // to log the next time a normal Go thread gets the 47 // signal handler. 48 // Assuming the stacks are 2 words each (we don't get 49 // a full traceback from those threads), plus one word 50 // size for framing, 100 Hz profiling would generate 51 // 300 words per second. 52 // Hopefully a normal Go thread will get the profiling 53 // signal at least once every few seconds. 54 extra [1000]uintptr 55 numExtra int 56 lostExtra uint64 // count of frames lost because extra is full 57 lostAtomic uint64 // count of frames lost because of being in atomic64 on mips/arm; updated racily 58 } 59 60 var cpuprof cpuProfile 61 62 // SetCPUProfileRate sets the CPU profiling rate to hz samples per second. 63 // If hz <= 0, SetCPUProfileRate turns off profiling. 64 // If the profiler is on, the rate cannot be changed without first turning it off. 65 // 66 // Most clients should use the runtime/pprof package or 67 // the testing package's -test.cpuprofile flag instead of calling 68 // SetCPUProfileRate directly. 69 func SetCPUProfileRate(hz int) { 70 // Clamp hz to something reasonable. 71 if hz < 0 { 72 hz = 0 73 } 74 if hz > 1000000 { 75 hz = 1000000 76 } 77 78 lock(&cpuprof.lock) 79 if hz > 0 { 80 if cpuprof.on || cpuprof.log != nil { 81 print("runtime: cannot set cpu profile rate until previous profile has finished.\n") 82 unlock(&cpuprof.lock) 83 return 84 } 85 86 cpuprof.on = true 87 cpuprof.log = newProfBuf(1, profBufWordCount, profBufTagCount) 88 hdr := [1]uint64{uint64(hz)} 89 cpuprof.log.write(nil, nanotime(), hdr[:], nil) 90 setcpuprofilerate(int32(hz)) 91 } else if cpuprof.on { 92 setcpuprofilerate(0) 93 cpuprof.on = false 94 cpuprof.addExtra() 95 cpuprof.log.close() 96 } 97 unlock(&cpuprof.lock) 98 } 99 100 // add adds the stack trace to the profile. 101 // It is called from signal handlers and other limited environments 102 // and cannot allocate memory or acquire locks that might be 103 // held at the time of the signal, nor can it use substantial amounts 104 // of stack. 105 // 106 //go:nowritebarrierrec 107 func (p *cpuProfile) add(tagPtr *unsafe.Pointer, stk []uintptr) { 108 // Simple cas-lock to coordinate with setcpuprofilerate. 109 for !atomic.Cas(&prof.signalLock, 0, 1) { 110 // TODO: Is it safe to osyield here? https://go.dev/issue/52672 111 osyield() 112 } 113 114 if prof.hz != 0 { // implies cpuprof.log != nil 115 if p.numExtra > 0 || p.lostExtra > 0 || p.lostAtomic > 0 { 116 p.addExtra() 117 } 118 hdr := [1]uint64{1} 119 // Note: write "knows" that the argument is &gp.labels, 120 // because otherwise its write barrier behavior may not 121 // be correct. See the long comment there before 122 // changing the argument here. 123 cpuprof.log.write(tagPtr, nanotime(), hdr[:], stk) 124 } 125 126 atomic.Store(&prof.signalLock, 0) 127 } 128 129 // addNonGo adds the non-Go stack trace to the profile. 130 // It is called from a non-Go thread, so we cannot use much stack at all, 131 // nor do anything that needs a g or an m. 132 // In particular, we can't call cpuprof.log.write. 133 // Instead, we copy the stack into cpuprof.extra, 134 // which will be drained the next time a Go thread 135 // gets the signal handling event. 136 // 137 //go:nosplit 138 //go:nowritebarrierrec 139 func (p *cpuProfile) addNonGo(stk []uintptr) { 140 // Simple cas-lock to coordinate with SetCPUProfileRate. 141 // (Other calls to add or addNonGo should be blocked out 142 // by the fact that only one SIGPROF can be handled by the 143 // process at a time. If not, this lock will serialize those too. 144 // The use of timer_create(2) on Linux to request process-targeted 145 // signals may have changed this.) 146 for !atomic.Cas(&prof.signalLock, 0, 1) { 147 // TODO: Is it safe to osyield here? https://go.dev/issue/52672 148 osyield() 149 } 150 151 if cpuprof.numExtra+1+len(stk) < len(cpuprof.extra) { 152 i := cpuprof.numExtra 153 cpuprof.extra[i] = uintptr(1 + len(stk)) 154 copy(cpuprof.extra[i+1:], stk) 155 cpuprof.numExtra += 1 + len(stk) 156 } else { 157 cpuprof.lostExtra++ 158 } 159 160 atomic.Store(&prof.signalLock, 0) 161 } 162 163 // addExtra adds the "extra" profiling events, 164 // queued by addNonGo, to the profile log. 165 // addExtra is called either from a signal handler on a Go thread 166 // or from an ordinary goroutine; either way it can use stack 167 // and has a g. The world may be stopped, though. 168 func (p *cpuProfile) addExtra() { 169 // Copy accumulated non-Go profile events. 170 hdr := [1]uint64{1} 171 for i := 0; i < p.numExtra; { 172 p.log.write(nil, 0, hdr[:], p.extra[i+1:i+int(p.extra[i])]) 173 i += int(p.extra[i]) 174 } 175 p.numExtra = 0 176 177 // Report any lost events. 178 if p.lostExtra > 0 { 179 hdr := [1]uint64{p.lostExtra} 180 lostStk := [2]uintptr{ 181 abi.FuncPCABIInternal(_LostExternalCode) + sys.PCQuantum, 182 abi.FuncPCABIInternal(_ExternalCode) + sys.PCQuantum, 183 } 184 p.log.write(nil, 0, hdr[:], lostStk[:]) 185 p.lostExtra = 0 186 } 187 188 if p.lostAtomic > 0 { 189 hdr := [1]uint64{p.lostAtomic} 190 lostStk := [2]uintptr{ 191 abi.FuncPCABIInternal(_LostSIGPROFDuringAtomic64) + sys.PCQuantum, 192 abi.FuncPCABIInternal(_System) + sys.PCQuantum, 193 } 194 p.log.write(nil, 0, hdr[:], lostStk[:]) 195 p.lostAtomic = 0 196 } 197 198 } 199 200 // CPUProfile panics. 201 // It formerly provided raw access to chunks of 202 // a pprof-format profile generated by the runtime. 203 // The details of generating that format have changed, 204 // so this functionality has been removed. 205 // 206 // Deprecated: Use the runtime/pprof package, 207 // or the handlers in the net/http/pprof package, 208 // or the testing package's -test.cpuprofile flag instead. 209 func CPUProfile() []byte { 210 panic("CPUProfile no longer available") 211 } 212 213 //go:linkname runtime_pprof_runtime_cyclesPerSecond runtime/pprof.runtime_cyclesPerSecond 214 func runtime_pprof_runtime_cyclesPerSecond() int64 { 215 return tickspersecond() 216 } 217 218 // readProfile, provided to runtime/pprof, returns the next chunk of 219 // binary CPU profiling stack trace data, blocking until data is available. 220 // If profiling is turned off and all the profile data accumulated while it was 221 // on has been returned, readProfile returns eof=true. 222 // The caller must save the returned data and tags before calling readProfile again. 223 // The returned data contains a whole number of records, and tags contains 224 // exactly one entry per record. 225 // 226 //go:linkname runtime_pprof_readProfile runtime/pprof.readProfile 227 func runtime_pprof_readProfile() ([]uint64, []unsafe.Pointer, bool) { 228 lock(&cpuprof.lock) 229 log := cpuprof.log 230 unlock(&cpuprof.lock) 231 data, tags, eof := log.read(profBufBlocking) 232 if len(data) == 0 && eof { 233 lock(&cpuprof.lock) 234 cpuprof.log = nil 235 unlock(&cpuprof.lock) 236 } 237 return data, tags, eof 238 } 239