mstats.go

Documentation: runtime

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Memory statistics
     6  
     7  package runtime
     8  
     9  import (
    10  	"runtime/internal/atomic"
    11  	"unsafe"
    12  )
    13  
    14  type mstats struct {
    15  	// Statistics about malloc heap.
    16  	heapStats consistentHeapStats
    17  
    18  	// Statistics about stacks.
    19  	stacks_sys sysMemStat // only counts newosproc0 stack in mstats; differs from MemStats.StackSys
    20  
    21  	// Statistics about allocation of low-level fixed-size structures.
    22  	mspan_sys    sysMemStat
    23  	mcache_sys   sysMemStat
    24  	buckhash_sys sysMemStat // profiling bucket hash table
    25  
    26  	// Statistics about GC overhead.
    27  	gcMiscSys sysMemStat // updated atomically or during STW
    28  
    29  	// Miscellaneous statistics.
    30  	other_sys sysMemStat // updated atomically or during STW
    31  
    32  	// Statistics about the garbage collector.
    33  
    34  	// Protected by mheap or stopping the world during GC.
    35  	last_gc_unix    uint64 // last gc (in unix time)
    36  	pause_total_ns  uint64
    37  	pause_ns        [256]uint64 // circular buffer of recent gc pause lengths
    38  	pause_end       [256]uint64 // circular buffer of recent gc end times (nanoseconds since 1970)
    39  	numgc           uint32
    40  	numforcedgc     uint32  // number of user-forced GCs
    41  	gc_cpu_fraction float64 // fraction of CPU time used by GC
    42  
    43  	last_gc_nanotime uint64 // last gc (monotonic time)
    44  	lastHeapInUse    uint64 // heapInUse at mark termination of the previous GC
    45  
    46  	enablegc bool
    47  
    48  	_ uint32 // ensure gcPauseDist is aligned.
    49  
    50  	// gcPauseDist represents the distribution of all GC-related
    51  	// application pauses in the runtime.
    52  	//
    53  	// Each individual pause is counted separately, unlike pause_ns.
    54  	gcPauseDist timeHistogram
    55  }
    56  
    57  var memstats mstats
    58  
    59  // A MemStats records statistics about the memory allocator.
    60  type MemStats struct {
    61  	// General statistics.
    62  
    63  	// Alloc is bytes of allocated heap objects.
    64  	//
    65  	// This is the same as HeapAlloc (see below).
    66  	Alloc uint64
    67  
    68  	// TotalAlloc is cumulative bytes allocated for heap objects.
    69  	//
    70  	// TotalAlloc increases as heap objects are allocated, but
    71  	// unlike Alloc and HeapAlloc, it does not decrease when
    72  	// objects are freed.
    73  	TotalAlloc uint64
    74  
    75  	// Sys is the total bytes of memory obtained from the OS.
    76  	//
    77  	// Sys is the sum of the XSys fields below. Sys measures the
    78  	// virtual address space reserved by the Go runtime for the
    79  	// heap, stacks, and other internal data structures. It's
    80  	// likely that not all of the virtual address space is backed
    81  	// by physical memory at any given moment, though in general
    82  	// it all was at some point.
    83  	Sys uint64
    84  
    85  	// Lookups is the number of pointer lookups performed by the
    86  	// runtime.
    87  	//
    88  	// This is primarily useful for debugging runtime internals.
    89  	Lookups uint64
    90  
    91  	// Mallocs is the cumulative count of heap objects allocated.
    92  	// The number of live objects is Mallocs - Frees.
    93  	Mallocs uint64
    94  
    95  	// Frees is the cumulative count of heap objects freed.
    96  	Frees uint64
    97  
    98  	// Heap memory statistics.
    99  	//
   100  	// Interpreting the heap statistics requires some knowledge of
   101  	// how Go organizes memory. Go divides the virtual address
   102  	// space of the heap into "spans", which are contiguous
   103  	// regions of memory 8K or larger. A span may be in one of
   104  	// three states:
   105  	//
   106  	// An "idle" span contains no objects or other data. The
   107  	// physical memory backing an idle span can be released back
   108  	// to the OS (but the virtual address space never is), or it
   109  	// can be converted into an "in use" or "stack" span.
   110  	//
   111  	// An "in use" span contains at least one heap object and may
   112  	// have free space available to allocate more heap objects.
   113  	//
   114  	// A "stack" span is used for goroutine stacks. Stack spans
   115  	// are not considered part of the heap. A span can change
   116  	// between heap and stack memory; it is never used for both
   117  	// simultaneously.
   118  
   119  	// HeapAlloc is bytes of allocated heap objects.
   120  	//
   121  	// "Allocated" heap objects include all reachable objects, as
   122  	// well as unreachable objects that the garbage collector has
   123  	// not yet freed. Specifically, HeapAlloc increases as heap
   124  	// objects are allocated and decreases as the heap is swept
   125  	// and unreachable objects are freed. Sweeping occurs
   126  	// incrementally between GC cycles, so these two processes
   127  	// occur simultaneously, and as a result HeapAlloc tends to
   128  	// change smoothly (in contrast with the sawtooth that is
   129  	// typical of stop-the-world garbage collectors).
   130  	HeapAlloc uint64
   131  
   132  	// HeapSys is bytes of heap memory obtained from the OS.
   133  	//
   134  	// HeapSys measures the amount of virtual address space
   135  	// reserved for the heap. This includes virtual address space
   136  	// that has been reserved but not yet used, which consumes no
   137  	// physical memory, but tends to be small, as well as virtual
   138  	// address space for which the physical memory has been
   139  	// returned to the OS after it became unused (see HeapReleased
   140  	// for a measure of the latter).
   141  	//
   142  	// HeapSys estimates the largest size the heap has had.
   143  	HeapSys uint64
   144  
   145  	// HeapIdle is bytes in idle (unused) spans.
   146  	//
   147  	// Idle spans have no objects in them. These spans could be
   148  	// (and may already have been) returned to the OS, or they can
   149  	// be reused for heap allocations, or they can be reused as
   150  	// stack memory.
   151  	//
   152  	// HeapIdle minus HeapReleased estimates the amount of memory
   153  	// that could be returned to the OS, but is being retained by
   154  	// the runtime so it can grow the heap without requesting more
   155  	// memory from the OS. If this difference is significantly
   156  	// larger than the heap size, it indicates there was a recent
   157  	// transient spike in live heap size.
   158  	HeapIdle uint64
   159  
   160  	// HeapInuse is bytes in in-use spans.
   161  	//
   162  	// In-use spans have at least one object in them. These spans
   163  	// can only be used for other objects of roughly the same
   164  	// size.
   165  	//
   166  	// HeapInuse minus HeapAlloc estimates the amount of memory
   167  	// that has been dedicated to particular size classes, but is
   168  	// not currently being used. This is an upper bound on
   169  	// fragmentation, but in general this memory can be reused
   170  	// efficiently.
   171  	HeapInuse uint64
   172  
   173  	// HeapReleased is bytes of physical memory returned to the OS.
   174  	//
   175  	// This counts heap memory from idle spans that was returned
   176  	// to the OS and has not yet been reacquired for the heap.
   177  	HeapReleased uint64
   178  
   179  	// HeapObjects is the number of allocated heap objects.
   180  	//
   181  	// Like HeapAlloc, this increases as objects are allocated and
   182  	// decreases as the heap is swept and unreachable objects are
   183  	// freed.
   184  	HeapObjects uint64
   185  
   186  	// Stack memory statistics.
   187  	//
   188  	// Stacks are not considered part of the heap, but the runtime
   189  	// can reuse a span of heap memory for stack memory, and
   190  	// vice-versa.
   191  
   192  	// StackInuse is bytes in stack spans.
   193  	//
   194  	// In-use stack spans have at least one stack in them. These
   195  	// spans can only be used for other stacks of the same size.
   196  	//
   197  	// There is no StackIdle because unused stack spans are
   198  	// returned to the heap (and hence counted toward HeapIdle).
   199  	StackInuse uint64
   200  
   201  	// StackSys is bytes of stack memory obtained from the OS.
   202  	//
   203  	// StackSys is StackInuse, plus any memory obtained directly
   204  	// from the OS for OS thread stacks (which should be minimal).
   205  	StackSys uint64
   206  
   207  	// Off-heap memory statistics.
   208  	//
   209  	// The following statistics measure runtime-internal
   210  	// structures that are not allocated from heap memory (usually
   211  	// because they are part of implementing the heap). Unlike
   212  	// heap or stack memory, any memory allocated to these
   213  	// structures is dedicated to these structures.
   214  	//
   215  	// These are primarily useful for debugging runtime memory
   216  	// overheads.
   217  
   218  	// MSpanInuse is bytes of allocated mspan structures.
   219  	MSpanInuse uint64
   220  
   221  	// MSpanSys is bytes of memory obtained from the OS for mspan
   222  	// structures.
   223  	MSpanSys uint64
   224  
   225  	// MCacheInuse is bytes of allocated mcache structures.
   226  	MCacheInuse uint64
   227  
   228  	// MCacheSys is bytes of memory obtained from the OS for
   229  	// mcache structures.
   230  	MCacheSys uint64
   231  
   232  	// BuckHashSys is bytes of memory in profiling bucket hash tables.
   233  	BuckHashSys uint64
   234  
   235  	// GCSys is bytes of memory in garbage collection metadata.
   236  	GCSys uint64
   237  
   238  	// OtherSys is bytes of memory in miscellaneous off-heap
   239  	// runtime allocations.
   240  	OtherSys uint64
   241  
   242  	// Garbage collector statistics.
   243  
   244  	// NextGC is the target heap size of the next GC cycle.
   245  	//
   246  	// The garbage collector's goal is to keep HeapAlloc ≤ NextGC.
   247  	// At the end of each GC cycle, the target for the next cycle
   248  	// is computed based on the amount of reachable data and the
   249  	// value of GOGC.
   250  	NextGC uint64
   251  
   252  	// LastGC is the time the last garbage collection finished, as
   253  	// nanoseconds since 1970 (the UNIX epoch).
   254  	LastGC uint64
   255  
   256  	// PauseTotalNs is the cumulative nanoseconds in GC
   257  	// stop-the-world pauses since the program started.
   258  	//
   259  	// During a stop-the-world pause, all goroutines are paused
   260  	// and only the garbage collector can run.
   261  	PauseTotalNs uint64
   262  
   263  	// PauseNs is a circular buffer of recent GC stop-the-world
   264  	// pause times in nanoseconds.
   265  	//
   266  	// The most recent pause is at PauseNs[(NumGC+255)%256]. In
   267  	// general, PauseNs[N%256] records the time paused in the most
   268  	// recent N%256th GC cycle. There may be multiple pauses per
   269  	// GC cycle; this is the sum of all pauses during a cycle.
   270  	PauseNs [256]uint64
   271  
   272  	// PauseEnd is a circular buffer of recent GC pause end times,
   273  	// as nanoseconds since 1970 (the UNIX epoch).
   274  	//
   275  	// This buffer is filled the same way as PauseNs. There may be
   276  	// multiple pauses per GC cycle; this records the end of the
   277  	// last pause in a cycle.
   278  	PauseEnd [256]uint64
   279  
   280  	// NumGC is the number of completed GC cycles.
   281  	NumGC uint32
   282  
   283  	// NumForcedGC is the number of GC cycles that were forced by
   284  	// the application calling the GC function.
   285  	NumForcedGC uint32
   286  
   287  	// GCCPUFraction is the fraction of this program's available
   288  	// CPU time used by the GC since the program started.
   289  	//
   290  	// GCCPUFraction is expressed as a number between 0 and 1,
   291  	// where 0 means GC has consumed none of this program's CPU. A
   292  	// program's available CPU time is defined as the integral of
   293  	// GOMAXPROCS since the program started. That is, if
   294  	// GOMAXPROCS is 2 and a program has been running for 10
   295  	// seconds, its "available CPU" is 20 seconds. GCCPUFraction
   296  	// does not include CPU time used for write barrier activity.
   297  	//
   298  	// This is the same as the fraction of CPU reported by
   299  	// GODEBUG=gctrace=1.
   300  	GCCPUFraction float64
   301  
   302  	// EnableGC indicates that GC is enabled. It is always true,
   303  	// even if GOGC=off.
   304  	EnableGC bool
   305  
   306  	// DebugGC is currently unused.
   307  	DebugGC bool
   308  
   309  	// BySize reports per-size class allocation statistics.
   310  	//
   311  	// BySize[N] gives statistics for allocations of size S where
   312  	// BySize[N-1].Size < S ≤ BySize[N].Size.
   313  	//
   314  	// This does not report allocations larger than BySize[60].Size.
   315  	BySize [61]struct {
   316  		// Size is the maximum byte size of an object in this
   317  		// size class.
   318  		Size uint32
   319  
   320  		// Mallocs is the cumulative count of heap objects
   321  		// allocated in this size class. The cumulative bytes
   322  		// of allocation is Size*Mallocs. The number of live
   323  		// objects in this size class is Mallocs - Frees.
   324  		Mallocs uint64
   325  
   326  		// Frees is the cumulative count of heap objects freed
   327  		// in this size class.
   328  		Frees uint64
   329  	}
   330  }
   331  
   332  func init() {
   333  	if offset := unsafe.Offsetof(memstats.heapStats); offset%8 != 0 {
   334  		println(offset)
   335  		throw("memstats.heapStats not aligned to 8 bytes")
   336  	}
   337  	if offset := unsafe.Offsetof(memstats.gcPauseDist); offset%8 != 0 {
   338  		println(offset)
   339  		throw("memstats.gcPauseDist not aligned to 8 bytes")
   340  	}
   341  	// Ensure the size of heapStatsDelta causes adjacent fields/slots (e.g.
   342  	// [3]heapStatsDelta) to be 8-byte aligned.
   343  	if size := unsafe.Sizeof(heapStatsDelta{}); size%8 != 0 {
   344  		println(size)
   345  		throw("heapStatsDelta not a multiple of 8 bytes in size")
   346  	}
   347  }
   348  
   349  // ReadMemStats populates m with memory allocator statistics.
   350  //
   351  // The returned memory allocator statistics are up to date as of the
   352  // call to ReadMemStats. This is in contrast with a heap profile,
   353  // which is a snapshot as of the most recently completed garbage
   354  // collection cycle.
   355  func ReadMemStats(m *MemStats) {
   356  	stopTheWorld("read mem stats")
   357  
   358  	systemstack(func() {
   359  		readmemstats_m(m)
   360  	})
   361  
   362  	startTheWorld()
   363  }
   364  
   365  // readmemstats_m populates stats for internal runtime values.
   366  //
   367  // The world must be stopped.
   368  func readmemstats_m(stats *MemStats) {
   369  	assertWorldStopped()
   370  
   371  	// Flush mcaches to mcentral before doing anything else.
   372  	//
   373  	// Flushing to the mcentral may in general cause stats to
   374  	// change as mcentral data structures are manipulated.
   375  	systemstack(flushallmcaches)
   376  
   377  	// Calculate memory allocator stats.
   378  	// During program execution we only count number of frees and amount of freed memory.
   379  	// Current number of alive objects in the heap and amount of alive heap memory
   380  	// are calculated by scanning all spans.
   381  	// Total number of mallocs is calculated as number of frees plus number of alive objects.
   382  	// Similarly, total amount of allocated memory is calculated as amount of freed memory
   383  	// plus amount of alive heap memory.
   384  
   385  	// Collect consistent stats, which are the source-of-truth in some cases.
   386  	var consStats heapStatsDelta
   387  	memstats.heapStats.unsafeRead(&consStats)
   388  
   389  	// Collect large allocation stats.
   390  	totalAlloc := consStats.largeAlloc
   391  	nMalloc := consStats.largeAllocCount
   392  	totalFree := consStats.largeFree
   393  	nFree := consStats.largeFreeCount
   394  
   395  	// Collect per-sizeclass stats.
   396  	var bySize [_NumSizeClasses]struct {
   397  		Size    uint32
   398  		Mallocs uint64
   399  		Frees   uint64
   400  	}
   401  	for i := range bySize {
   402  		bySize[i].Size = uint32(class_to_size[i])
   403  
   404  		// Malloc stats.
   405  		a := consStats.smallAllocCount[i]
   406  		totalAlloc += a * uint64(class_to_size[i])
   407  		nMalloc += a
   408  		bySize[i].Mallocs = a
   409  
   410  		// Free stats.
   411  		f := consStats.smallFreeCount[i]
   412  		totalFree += f * uint64(class_to_size[i])
   413  		nFree += f
   414  		bySize[i].Frees = f
   415  	}
   416  
   417  	// Account for tiny allocations.
   418  	// For historical reasons, MemStats includes tiny allocations
   419  	// in both the total free and total alloc count. This double-counts
   420  	// memory in some sense because their tiny allocation block is also
   421  	// counted. Tracking the lifetime of individual tiny allocations is
   422  	// currently not done because it would be too expensive.
   423  	nFree += consStats.tinyAllocCount
   424  	nMalloc += consStats.tinyAllocCount
   425  
   426  	// Calculate derived stats.
   427  
   428  	stackInUse := uint64(consStats.inStacks)
   429  	gcWorkBufInUse := uint64(consStats.inWorkBufs)
   430  	gcProgPtrScalarBitsInUse := uint64(consStats.inPtrScalarBits)
   431  
   432  	totalMapped := gcController.heapInUse.load() + gcController.heapFree.load() + gcController.heapReleased.load() +
   433  		memstats.stacks_sys.load() + memstats.mspan_sys.load() + memstats.mcache_sys.load() +
   434  		memstats.buckhash_sys.load() + memstats.gcMiscSys.load() + memstats.other_sys.load() +
   435  		stackInUse + gcWorkBufInUse + gcProgPtrScalarBitsInUse
   436  
   437  	heapGoal := gcController.heapGoal()
   438  
   439  	// The world is stopped, so the consistent stats (after aggregation)
   440  	// should be identical to some combination of memstats. In particular:
   441  	//
   442  	// * memstats.heapInUse == inHeap
   443  	// * memstats.heapReleased == released
   444  	// * memstats.heapInUse + memstats.heapFree == committed - inStacks - inWorkBufs - inPtrScalarBits
   445  	// * memstats.totalAlloc == totalAlloc
   446  	// * memstats.totalFree == totalFree
   447  	//
   448  	// Check if that's actually true.
   449  	//
   450  	// TODO(mknyszek): Maybe don't throw here. It would be bad if a
   451  	// bug in otherwise benign accounting caused the whole application
   452  	// to crash.
   453  	if gcController.heapInUse.load() != uint64(consStats.inHeap) {
   454  		print("runtime: heapInUse=", gcController.heapInUse.load(), "\n")
   455  		print("runtime: consistent value=", consStats.inHeap, "\n")
   456  		throw("heapInUse and consistent stats are not equal")
   457  	}
   458  	if gcController.heapReleased.load() != uint64(consStats.released) {
   459  		print("runtime: heapReleased=", gcController.heapReleased.load(), "\n")
   460  		print("runtime: consistent value=", consStats.released, "\n")
   461  		throw("heapReleased and consistent stats are not equal")
   462  	}
   463  	heapRetained := gcController.heapInUse.load() + gcController.heapFree.load()
   464  	consRetained := uint64(consStats.committed - consStats.inStacks - consStats.inWorkBufs - consStats.inPtrScalarBits)
   465  	if heapRetained != consRetained {
   466  		print("runtime: global value=", heapRetained, "\n")
   467  		print("runtime: consistent value=", consRetained, "\n")
   468  		throw("measures of the retained heap are not equal")
   469  	}
   470  	if gcController.totalAlloc.Load() != totalAlloc {
   471  		print("runtime: totalAlloc=", gcController.totalAlloc.Load(), "\n")
   472  		print("runtime: consistent value=", totalAlloc, "\n")
   473  		throw("totalAlloc and consistent stats are not equal")
   474  	}
   475  	if gcController.totalFree.Load() != totalFree {
   476  		print("runtime: totalFree=", gcController.totalFree.Load(), "\n")
   477  		print("runtime: consistent value=", totalFree, "\n")
   478  		throw("totalFree and consistent stats are not equal")
   479  	}
   480  	// Also check that mappedReady lines up with totalMapped - released.
   481  	// This isn't really the same type of "make sure consistent stats line up" situation,
   482  	// but this is an opportune time to check.
   483  	if gcController.mappedReady.Load() != totalMapped-uint64(consStats.released) {
   484  		print("runtime: mappedReady=", gcController.mappedReady.Load(), "\n")
   485  		print("runtime: totalMapped=", totalMapped, "\n")
   486  		print("runtime: released=", uint64(consStats.released), "\n")
   487  		print("runtime: totalMapped-released=", totalMapped-uint64(consStats.released), "\n")
   488  		throw("mappedReady and other memstats are not equal")
   489  	}
   490  
   491  	// We've calculated all the values we need. Now, populate stats.
   492  
   493  	stats.Alloc = totalAlloc - totalFree
   494  	stats.TotalAlloc = totalAlloc
   495  	stats.Sys = totalMapped
   496  	stats.Mallocs = nMalloc
   497  	stats.Frees = nFree
   498  	stats.HeapAlloc = totalAlloc - totalFree
   499  	stats.HeapSys = gcController.heapInUse.load() + gcController.heapFree.load() + gcController.heapReleased.load()
   500  	// By definition, HeapIdle is memory that was mapped
   501  	// for the heap but is not currently used to hold heap
   502  	// objects. It also specifically is memory that can be
   503  	// used for other purposes, like stacks, but this memory
   504  	// is subtracted out of HeapSys before it makes that
   505  	// transition. Put another way:
   506  	//
   507  	// HeapSys = bytes allocated from the OS for the heap - bytes ultimately used for non-heap purposes
   508  	// HeapIdle = bytes allocated from the OS for the heap - bytes ultimately used for any purpose
   509  	//
   510  	// or
   511  	//
   512  	// HeapSys = sys - stacks_inuse - gcWorkBufInUse - gcProgPtrScalarBitsInUse
   513  	// HeapIdle = sys - stacks_inuse - gcWorkBufInUse - gcProgPtrScalarBitsInUse - heapInUse
   514  	//
   515  	// => HeapIdle = HeapSys - heapInUse = heapFree + heapReleased
   516  	stats.HeapIdle = gcController.heapFree.load() + gcController.heapReleased.load()
   517  	stats.HeapInuse = gcController.heapInUse.load()
   518  	stats.HeapReleased = gcController.heapReleased.load()
   519  	stats.HeapObjects = nMalloc - nFree
   520  	stats.StackInuse = stackInUse
   521  	// memstats.stacks_sys is only memory mapped directly for OS stacks.
   522  	// Add in heap-allocated stack memory for user consumption.
   523  	stats.StackSys = stackInUse + memstats.stacks_sys.load()
   524  	stats.MSpanInuse = uint64(mheap_.spanalloc.inuse)
   525  	stats.MSpanSys = memstats.mspan_sys.load()
   526  	stats.MCacheInuse = uint64(mheap_.cachealloc.inuse)
   527  	stats.MCacheSys = memstats.mcache_sys.load()
   528  	stats.BuckHashSys = memstats.buckhash_sys.load()
   529  	// MemStats defines GCSys as an aggregate of all memory related
   530  	// to the memory management system, but we track this memory
   531  	// at a more granular level in the runtime.
   532  	stats.GCSys = memstats.gcMiscSys.load() + gcWorkBufInUse + gcProgPtrScalarBitsInUse
   533  	stats.OtherSys = memstats.other_sys.load()
   534  	stats.NextGC = heapGoal
   535  	stats.LastGC = memstats.last_gc_unix
   536  	stats.PauseTotalNs = memstats.pause_total_ns
   537  	stats.PauseNs = memstats.pause_ns
   538  	stats.PauseEnd = memstats.pause_end
   539  	stats.NumGC = memstats.numgc
   540  	stats.NumForcedGC = memstats.numforcedgc
   541  	stats.GCCPUFraction = memstats.gc_cpu_fraction
   542  	stats.EnableGC = true
   543  
   544  	// stats.BySize and bySize might not match in length.
   545  	// That's OK, stats.BySize cannot change due to backwards
   546  	// compatibility issues. copy will copy the minimum amount
   547  	// of values between the two of them.
   548  	copy(stats.BySize[:], bySize[:])
   549  }
   550  
   551  //go:linkname readGCStats runtime/debug.readGCStats
   552  func readGCStats(pauses *[]uint64) {
   553  	systemstack(func() {
   554  		readGCStats_m(pauses)
   555  	})
   556  }
   557  
   558  // readGCStats_m must be called on the system stack because it acquires the heap
   559  // lock. See mheap for details.
   560  //
   561  //go:systemstack
   562  func readGCStats_m(pauses *[]uint64) {
   563  	p := *pauses
   564  	// Calling code in runtime/debug should make the slice large enough.
   565  	if cap(p) < len(memstats.pause_ns)+3 {
   566  		throw("short slice passed to readGCStats")
   567  	}
   568  
   569  	// Pass back: pauses, pause ends, last gc (absolute time), number of gc, total pause ns.
   570  	lock(&mheap_.lock)
   571  
   572  	n := memstats.numgc
   573  	if n > uint32(len(memstats.pause_ns)) {
   574  		n = uint32(len(memstats.pause_ns))
   575  	}
   576  
   577  	// The pause buffer is circular. The most recent pause is at
   578  	// pause_ns[(numgc-1)%len(pause_ns)], and then backward
   579  	// from there to go back farther in time. We deliver the times
   580  	// most recent first (in p[0]).
   581  	p = p[:cap(p)]
   582  	for i := uint32(0); i < n; i++ {
   583  		j := (memstats.numgc - 1 - i) % uint32(len(memstats.pause_ns))
   584  		p[i] = memstats.pause_ns[j]
   585  		p[n+i] = memstats.pause_end[j]
   586  	}
   587  
   588  	p[n+n] = memstats.last_gc_unix
   589  	p[n+n+1] = uint64(memstats.numgc)
   590  	p[n+n+2] = memstats.pause_total_ns
   591  	unlock(&mheap_.lock)
   592  	*pauses = p[:n+n+3]
   593  }
   594  
   595  // flushmcache flushes the mcache of allp[i].
   596  //
   597  // The world must be stopped.
   598  //
   599  //go:nowritebarrier
   600  func flushmcache(i int) {
   601  	assertWorldStopped()
   602  
   603  	p := allp[i]
   604  	c := p.mcache
   605  	if c == nil {
   606  		return
   607  	}
   608  	c.releaseAll()
   609  	stackcache_clear(c)
   610  }
   611  
   612  // flushallmcaches flushes the mcaches of all Ps.
   613  //
   614  // The world must be stopped.
   615  //
   616  //go:nowritebarrier
   617  func flushallmcaches() {
   618  	assertWorldStopped()
   619  
   620  	for i := 0; i < int(gomaxprocs); i++ {
   621  		flushmcache(i)
   622  	}
   623  }
   624  
   625  // sysMemStat represents a global system statistic that is managed atomically.
   626  //
   627  // This type must structurally be a uint64 so that mstats aligns with MemStats.
   628  type sysMemStat uint64
   629  
   630  // load atomically reads the value of the stat.
   631  //
   632  // Must be nosplit as it is called in runtime initialization, e.g. newosproc0.
   633  //
   634  //go:nosplit
   635  func (s *sysMemStat) load() uint64 {
   636  	return atomic.Load64((*uint64)(s))
   637  }
   638  
   639  // add atomically adds the sysMemStat by n.
   640  //
   641  // Must be nosplit as it is called in runtime initialization, e.g. newosproc0.
   642  //
   643  //go:nosplit
   644  func (s *sysMemStat) add(n int64) {
   645  	val := atomic.Xadd64((*uint64)(s), n)
   646  	if (n > 0 && int64(val) < n) || (n < 0 && int64(val)+n < n) {
   647  		print("runtime: val=", val, " n=", n, "\n")
   648  		throw("sysMemStat overflow")
   649  	}
   650  }
   651  
   652  // heapStatsDelta contains deltas of various runtime memory statistics
   653  // that need to be updated together in order for them to be kept
   654  // consistent with one another.
   655  type heapStatsDelta struct {
   656  	// Memory stats.
   657  	committed       int64 // byte delta of memory committed
   658  	released        int64 // byte delta of released memory generated
   659  	inHeap          int64 // byte delta of memory placed in the heap
   660  	inStacks        int64 // byte delta of memory reserved for stacks
   661  	inWorkBufs      int64 // byte delta of memory reserved for work bufs
   662  	inPtrScalarBits int64 // byte delta of memory reserved for unrolled GC prog bits
   663  
   664  	// Allocator stats.
   665  	//
   666  	// These are all uint64 because they're cumulative, and could quickly wrap
   667  	// around otherwise.
   668  	tinyAllocCount  uint64                  // number of tiny allocations
   669  	largeAlloc      uint64                  // bytes allocated for large objects
   670  	largeAllocCount uint64                  // number of large object allocations
   671  	smallAllocCount [_NumSizeClasses]uint64 // number of allocs for small objects
   672  	largeFree       uint64                  // bytes freed for large objects (>maxSmallSize)
   673  	largeFreeCount  uint64                  // number of frees for large objects (>maxSmallSize)
   674  	smallFreeCount  [_NumSizeClasses]uint64 // number of frees for small objects (<=maxSmallSize)
   675  
   676  	// NOTE: This struct must be a multiple of 8 bytes in size because it
   677  	// is stored in an array. If it's not, atomic accesses to the above
   678  	// fields may be unaligned and fail on 32-bit platforms.
   679  }
   680  
   681  // merge adds in the deltas from b into a.
   682  func (a *heapStatsDelta) merge(b *heapStatsDelta) {
   683  	a.committed += b.committed
   684  	a.released += b.released
   685  	a.inHeap += b.inHeap
   686  	a.inStacks += b.inStacks
   687  	a.inWorkBufs += b.inWorkBufs
   688  	a.inPtrScalarBits += b.inPtrScalarBits
   689  
   690  	a.tinyAllocCount += b.tinyAllocCount
   691  	a.largeAlloc += b.largeAlloc
   692  	a.largeAllocCount += b.largeAllocCount
   693  	for i := range b.smallAllocCount {
   694  		a.smallAllocCount[i] += b.smallAllocCount[i]
   695  	}
   696  	a.largeFree += b.largeFree
   697  	a.largeFreeCount += b.largeFreeCount
   698  	for i := range b.smallFreeCount {
   699  		a.smallFreeCount[i] += b.smallFreeCount[i]
   700  	}
   701  }
   702  
   703  // consistentHeapStats represents a set of various memory statistics
   704  // whose updates must be viewed completely to get a consistent
   705  // state of the world.
   706  //
   707  // To write updates to memory stats use the acquire and release
   708  // methods. To obtain a consistent global snapshot of these statistics,
   709  // use read.
   710  type consistentHeapStats struct {
   711  	// stats is a ring buffer of heapStatsDelta values.
   712  	// Writers always atomically update the delta at index gen.
   713  	//
   714  	// Readers operate by rotating gen (0 -> 1 -> 2 -> 0 -> ...)
   715  	// and synchronizing with writers by observing each P's
   716  	// statsSeq field. If the reader observes a P not writing,
   717  	// it can be sure that it will pick up the new gen value the
   718  	// next time it writes.
   719  	//
   720  	// The reader then takes responsibility by clearing space
   721  	// in the ring buffer for the next reader to rotate gen to
   722  	// that space (i.e. it merges in values from index (gen-2) mod 3
   723  	// to index (gen-1) mod 3, then clears the former).
   724  	//
   725  	// Note that this means only one reader can be reading at a time.
   726  	// There is no way for readers to synchronize.
   727  	//
   728  	// This process is why we need a ring buffer of size 3 instead
   729  	// of 2: one is for the writers, one contains the most recent
   730  	// data, and the last one is clear so writers can begin writing
   731  	// to it the moment gen is updated.
   732  	stats [3]heapStatsDelta
   733  
   734  	// gen represents the current index into which writers
   735  	// are writing, and can take on the value of 0, 1, or 2.
   736  	// This value is updated atomically.
   737  	gen uint32
   738  
   739  	// noPLock is intended to provide mutual exclusion for updating
   740  	// stats when no P is available. It does not block other writers
   741  	// with a P, only other writers without a P and the reader. Because
   742  	// stats are usually updated when a P is available, contention on
   743  	// this lock should be minimal.
   744  	noPLock mutex
   745  }
   746  
   747  // acquire returns a heapStatsDelta to be updated. In effect,
   748  // it acquires the shard for writing. release must be called
   749  // as soon as the relevant deltas are updated.
   750  //
   751  // The returned heapStatsDelta must be updated atomically.
   752  //
   753  // The caller's P must not change between acquire and
   754  // release. This also means that the caller should not
   755  // acquire a P or release its P in between. A P also must
   756  // not acquire a given consistentHeapStats if it hasn't
   757  // yet released it.
   758  //
   759  // nosplit because a stack growth in this function could
   760  // lead to a stack allocation that could reenter the
   761  // function.
   762  //
   763  //go:nosplit
   764  func (m *consistentHeapStats) acquire() *heapStatsDelta {
   765  	if pp := getg().m.p.ptr(); pp != nil {
   766  		seq := atomic.Xadd(&pp.statsSeq, 1)
   767  		if seq%2 == 0 {
   768  			// Should have been incremented to odd.
   769  			print("runtime: seq=", seq, "\n")
   770  			throw("bad sequence number")
   771  		}
   772  	} else {
   773  		lock(&m.noPLock)
   774  	}
   775  	gen := atomic.Load(&m.gen) % 3
   776  	return &m.stats[gen]
   777  }
   778  
   779  // release indicates that the writer is done modifying
   780  // the delta. The value returned by the corresponding
   781  // acquire must no longer be accessed or modified after
   782  // release is called.
   783  //
   784  // The caller's P must not change between acquire and
   785  // release. This also means that the caller should not
   786  // acquire a P or release its P in between.
   787  //
   788  // nosplit because a stack growth in this function could
   789  // lead to a stack allocation that causes another acquire
   790  // before this operation has completed.
   791  //
   792  //go:nosplit
   793  func (m *consistentHeapStats) release() {
   794  	if pp := getg().m.p.ptr(); pp != nil {
   795  		seq := atomic.Xadd(&pp.statsSeq, 1)
   796  		if seq%2 != 0 {
   797  			// Should have been incremented to even.
   798  			print("runtime: seq=", seq, "\n")
   799  			throw("bad sequence number")
   800  		}
   801  	} else {
   802  		unlock(&m.noPLock)
   803  	}
   804  }
   805  
   806  // unsafeRead aggregates the delta for this shard into out.
   807  //
   808  // Unsafe because it does so without any synchronization. The
   809  // world must be stopped.
   810  func (m *consistentHeapStats) unsafeRead(out *heapStatsDelta) {
   811  	assertWorldStopped()
   812  
   813  	for i := range m.stats {
   814  		out.merge(&m.stats[i])
   815  	}
   816  }
   817  
   818  // unsafeClear clears the shard.
   819  //
   820  // Unsafe because the world must be stopped and values should
   821  // be donated elsewhere before clearing.
   822  func (m *consistentHeapStats) unsafeClear() {
   823  	assertWorldStopped()
   824  
   825  	for i := range m.stats {
   826  		m.stats[i] = heapStatsDelta{}
   827  	}
   828  }
   829  
   830  // read takes a globally consistent snapshot of m
   831  // and puts the aggregated value in out. Even though out is a
   832  // heapStatsDelta, the resulting values should be complete and
   833  // valid statistic values.
   834  //
   835  // Not safe to call concurrently. The world must be stopped
   836  // or metricsSema must be held.
   837  func (m *consistentHeapStats) read(out *heapStatsDelta) {
   838  	// Getting preempted after this point is not safe because
   839  	// we read allp. We need to make sure a STW can't happen
   840  	// so it doesn't change out from under us.
   841  	mp := acquirem()
   842  
   843  	// Get the current generation. We can be confident that this
   844  	// will not change since read is serialized and is the only
   845  	// one that modifies currGen.
   846  	currGen := atomic.Load(&m.gen)
   847  	prevGen := currGen - 1
   848  	if currGen == 0 {
   849  		prevGen = 2
   850  	}
   851  
   852  	// Prevent writers without a P from writing while we update gen.
   853  	lock(&m.noPLock)
   854  
   855  	// Rotate gen, effectively taking a snapshot of the state of
   856  	// these statistics at the point of the exchange by moving
   857  	// writers to the next set of deltas.
   858  	//
   859  	// This exchange is safe to do because we won't race
   860  	// with anyone else trying to update this value.
   861  	atomic.Xchg(&m.gen, (currGen+1)%3)
   862  
   863  	// Allow P-less writers to continue. They'll be writing to the
   864  	// next generation now.
   865  	unlock(&m.noPLock)
   866  
   867  	for _, p := range allp {
   868  		// Spin until there are no more writers.
   869  		for atomic.Load(&p.statsSeq)%2 != 0 {
   870  		}
   871  	}
   872  
   873  	// At this point we've observed that each sequence
   874  	// number is even, so any future writers will observe
   875  	// the new gen value. That means it's safe to read from
   876  	// the other deltas in the stats buffer.
   877  
   878  	// Perform our responsibilities and free up
   879  	// stats[prevGen] for the next time we want to take
   880  	// a snapshot.
   881  	m.stats[currGen].merge(&m.stats[prevGen])
   882  	m.stats[prevGen] = heapStatsDelta{}
   883  
   884  	// Finally, copy out the complete delta.
   885  	*out = m.stats[currGen]
   886  
   887  	releasem(mp)
   888  }
   889
View as plain text