Source file
src/runtime/os_linux.go
Documentation: runtime
1
2
3
4
5 package runtime
6
7 import (
8 "internal/abi"
9 "internal/goarch"
10 "runtime/internal/atomic"
11 "runtime/internal/syscall"
12 "unsafe"
13 )
14
15
16
17
18 const sigPerThreadSyscall = _SIGRTMIN + 1
19
20 type mOS struct {
21
22
23
24
25
26
27
28 profileTimer int32
29 profileTimerValid uint32
30
31
32
33 needPerThreadSyscall atomic.Uint8
34 }
35
36
37 func futex(addr unsafe.Pointer, op int32, val uint32, ts, addr2 unsafe.Pointer, val3 uint32) int32
38
39
40
41
42
43
44
45
46
47
48 const (
49 _FUTEX_PRIVATE_FLAG = 128
50 _FUTEX_WAIT_PRIVATE = 0 | _FUTEX_PRIVATE_FLAG
51 _FUTEX_WAKE_PRIVATE = 1 | _FUTEX_PRIVATE_FLAG
52 )
53
54
55
56
57
58
59
60
61
62 func futexsleep(addr *uint32, val uint32, ns int64) {
63
64
65
66
67
68 if ns < 0 {
69 futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, nil, nil, 0)
70 return
71 }
72
73 var ts timespec
74 ts.setNsec(ns)
75 futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, unsafe.Pointer(&ts), nil, 0)
76 }
77
78
79
80
81 func futexwakeup(addr *uint32, cnt uint32) {
82 ret := futex(unsafe.Pointer(addr), _FUTEX_WAKE_PRIVATE, cnt, nil, nil, 0)
83 if ret >= 0 {
84 return
85 }
86
87
88
89
90 systemstack(func() {
91 print("futexwakeup addr=", addr, " returned ", ret, "\n")
92 })
93
94 *(*int32)(unsafe.Pointer(uintptr(0x1006))) = 0x1006
95 }
96
97 func getproccount() int32 {
98
99
100
101
102
103
104
105 const maxCPUs = 64 * 1024
106 var buf [maxCPUs / 8]byte
107 r := sched_getaffinity(0, unsafe.Sizeof(buf), &buf[0])
108 if r < 0 {
109 return 1
110 }
111 n := int32(0)
112 for _, v := range buf[:r] {
113 for v != 0 {
114 n += int32(v & 1)
115 v >>= 1
116 }
117 }
118 if n == 0 {
119 n = 1
120 }
121 return n
122 }
123
124
125 const (
126 _CLONE_VM = 0x100
127 _CLONE_FS = 0x200
128 _CLONE_FILES = 0x400
129 _CLONE_SIGHAND = 0x800
130 _CLONE_PTRACE = 0x2000
131 _CLONE_VFORK = 0x4000
132 _CLONE_PARENT = 0x8000
133 _CLONE_THREAD = 0x10000
134 _CLONE_NEWNS = 0x20000
135 _CLONE_SYSVSEM = 0x40000
136 _CLONE_SETTLS = 0x80000
137 _CLONE_PARENT_SETTID = 0x100000
138 _CLONE_CHILD_CLEARTID = 0x200000
139 _CLONE_UNTRACED = 0x800000
140 _CLONE_CHILD_SETTID = 0x1000000
141 _CLONE_STOPPED = 0x2000000
142 _CLONE_NEWUTS = 0x4000000
143 _CLONE_NEWIPC = 0x8000000
144
145
146
147
148
149
150
151
152 cloneFlags = _CLONE_VM |
153 _CLONE_FS |
154 _CLONE_FILES |
155 _CLONE_SIGHAND |
156 _CLONE_SYSVSEM |
157 _CLONE_THREAD
158 )
159
160
161 func clone(flags int32, stk, mp, gp, fn unsafe.Pointer) int32
162
163
164
165
166 func newosproc(mp *m) {
167 stk := unsafe.Pointer(mp.g0.stack.hi)
168
171 if false {
172 print("newosproc stk=", stk, " m=", mp, " g=", mp.g0, " clone=", abi.FuncPCABI0(clone), " id=", mp.id, " ostk=", &mp, "\n")
173 }
174
175
176
177 var oset sigset
178 sigprocmask(_SIG_SETMASK, &sigset_all, &oset)
179 ret := clone(cloneFlags, stk, unsafe.Pointer(mp), unsafe.Pointer(mp.g0), unsafe.Pointer(abi.FuncPCABI0(mstart)))
180 sigprocmask(_SIG_SETMASK, &oset, nil)
181
182 if ret < 0 {
183 print("runtime: failed to create new OS thread (have ", mcount(), " already; errno=", -ret, ")\n")
184 if ret == -_EAGAIN {
185 println("runtime: may need to increase max user processes (ulimit -u)")
186 }
187 throw("newosproc")
188 }
189 }
190
191
192
193
194 func newosproc0(stacksize uintptr, fn unsafe.Pointer) {
195 stack := sysAlloc(stacksize, &memstats.stacks_sys)
196 if stack == nil {
197 write(2, unsafe.Pointer(&failallocatestack[0]), int32(len(failallocatestack)))
198 exit(1)
199 }
200 ret := clone(cloneFlags, unsafe.Pointer(uintptr(stack)+stacksize), nil, nil, fn)
201 if ret < 0 {
202 write(2, unsafe.Pointer(&failthreadcreate[0]), int32(len(failthreadcreate)))
203 exit(1)
204 }
205 }
206
207 var failallocatestack = []byte("runtime: failed to allocate stack for the new OS thread\n")
208 var failthreadcreate = []byte("runtime: failed to create new OS thread\n")
209
210 const (
211 _AT_NULL = 0
212 _AT_PAGESZ = 6
213 _AT_HWCAP = 16
214 _AT_RANDOM = 25
215 _AT_HWCAP2 = 26
216 )
217
218 var procAuxv = []byte("/proc/self/auxv\x00")
219
220 var addrspace_vec [1]byte
221
222 func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32
223
224 func sysargs(argc int32, argv **byte) {
225 n := argc + 1
226
227
228 for argv_index(argv, n) != nil {
229 n++
230 }
231
232
233 n++
234
235
236 auxv := (*[1 << 28]uintptr)(add(unsafe.Pointer(argv), uintptr(n)*goarch.PtrSize))
237 if sysauxv(auxv[:]) != 0 {
238 return
239 }
240
241
242
243 fd := open(&procAuxv[0], 0 , 0)
244 if fd < 0 {
245
246
247
248 const size = 256 << 10
249 p, err := mmap(nil, size, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
250 if err != 0 {
251 return
252 }
253 var n uintptr
254 for n = 4 << 10; n < size; n <<= 1 {
255 err := mincore(unsafe.Pointer(uintptr(p)+n), 1, &addrspace_vec[0])
256 if err == 0 {
257 physPageSize = n
258 break
259 }
260 }
261 if physPageSize == 0 {
262 physPageSize = size
263 }
264 munmap(p, size)
265 return
266 }
267 var buf [128]uintptr
268 n = read(fd, noescape(unsafe.Pointer(&buf[0])), int32(unsafe.Sizeof(buf)))
269 closefd(fd)
270 if n < 0 {
271 return
272 }
273
274
275 buf[len(buf)-2] = _AT_NULL
276 sysauxv(buf[:])
277 }
278
279
280
281 var startupRandomData []byte
282
283 func sysauxv(auxv []uintptr) int {
284 var i int
285 for ; auxv[i] != _AT_NULL; i += 2 {
286 tag, val := auxv[i], auxv[i+1]
287 switch tag {
288 case _AT_RANDOM:
289
290
291 startupRandomData = (*[16]byte)(unsafe.Pointer(val))[:]
292
293 case _AT_PAGESZ:
294 physPageSize = val
295 }
296
297 archauxv(tag, val)
298 vdsoauxv(tag, val)
299 }
300 return i / 2
301 }
302
303 var sysTHPSizePath = []byte("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size\x00")
304
305 func getHugePageSize() uintptr {
306 var numbuf [20]byte
307 fd := open(&sysTHPSizePath[0], 0 , 0)
308 if fd < 0 {
309 return 0
310 }
311 ptr := noescape(unsafe.Pointer(&numbuf[0]))
312 n := read(fd, ptr, int32(len(numbuf)))
313 closefd(fd)
314 if n <= 0 {
315 return 0
316 }
317 n--
318 v, ok := atoi(slicebytetostringtmp((*byte)(ptr), int(n)))
319 if !ok || v < 0 {
320 v = 0
321 }
322 if v&(v-1) != 0 {
323
324 return 0
325 }
326 return uintptr(v)
327 }
328
329 func osinit() {
330 ncpu = getproccount()
331 physHugePageSize = getHugePageSize()
332 if iscgo {
333
334
335
336
337
338
339
340
341
342
343
344
345
346 sigdelset(&sigsetAllExiting, 32)
347 sigdelset(&sigsetAllExiting, 33)
348 sigdelset(&sigsetAllExiting, 34)
349 }
350 osArchInit()
351 }
352
353 var urandom_dev = []byte("/dev/urandom\x00")
354
355 func getRandomData(r []byte) {
356 if startupRandomData != nil {
357 n := copy(r, startupRandomData)
358 extendRandom(r, n)
359 return
360 }
361 fd := open(&urandom_dev[0], 0 , 0)
362 n := read(fd, unsafe.Pointer(&r[0]), int32(len(r)))
363 closefd(fd)
364 extendRandom(r, int(n))
365 }
366
367 func goenvs() {
368 goenvs_unix()
369 }
370
371
372
373
374
375
376
377 func libpreinit() {
378 initsig(true)
379 }
380
381
382
383 func mpreinit(mp *m) {
384 mp.gsignal = malg(32 * 1024)
385 mp.gsignal.m = mp
386 }
387
388 func gettid() uint32
389
390
391
392 func minit() {
393 minitSignals()
394
395
396
397
398 getg().m.procid = uint64(gettid())
399 }
400
401
402
403
404 func unminit() {
405 unminitSignals()
406 }
407
408
409
410 func mdestroy(mp *m) {
411 }
412
413
414
415
416
417 func sigreturn()
418 func sigtramp()
419 func cgoSigtramp()
420
421
422 func sigaltstack(new, old *stackt)
423
424
425 func setitimer(mode int32, new, old *itimerval)
426
427
428 func timer_create(clockid int32, sevp *sigevent, timerid *int32) int32
429
430
431 func timer_settime(timerid int32, flags int32, new, old *itimerspec) int32
432
433
434 func timer_delete(timerid int32) int32
435
436
437 func rtsigprocmask(how int32, new, old *sigset, size int32)
438
439
440
441 func sigprocmask(how int32, new, old *sigset) {
442 rtsigprocmask(how, new, old, int32(unsafe.Sizeof(*new)))
443 }
444
445 func raise(sig uint32)
446 func raiseproc(sig uint32)
447
448
449 func sched_getaffinity(pid, len uintptr, buf *byte) int32
450 func osyield()
451
452
453 func osyield_no_g() {
454 osyield()
455 }
456
457 func pipe2(flags int32) (r, w int32, errno int32)
458
459 const (
460 _si_max_size = 128
461 _sigev_max_size = 64
462 )
463
464
465
466 func setsig(i uint32, fn uintptr) {
467 var sa sigactiont
468 sa.sa_flags = _SA_SIGINFO | _SA_ONSTACK | _SA_RESTORER | _SA_RESTART
469 sigfillset(&sa.sa_mask)
470
471
472
473 if GOARCH == "386" || GOARCH == "amd64" {
474 sa.sa_restorer = abi.FuncPCABI0(sigreturn)
475 }
476 if fn == abi.FuncPCABIInternal(sighandler) {
477 if iscgo {
478 fn = abi.FuncPCABI0(cgoSigtramp)
479 } else {
480 fn = abi.FuncPCABI0(sigtramp)
481 }
482 }
483 sa.sa_handler = fn
484 sigaction(i, &sa, nil)
485 }
486
487
488
489 func setsigstack(i uint32) {
490 var sa sigactiont
491 sigaction(i, nil, &sa)
492 if sa.sa_flags&_SA_ONSTACK != 0 {
493 return
494 }
495 sa.sa_flags |= _SA_ONSTACK
496 sigaction(i, &sa, nil)
497 }
498
499
500
501 func getsig(i uint32) uintptr {
502 var sa sigactiont
503 sigaction(i, nil, &sa)
504 return sa.sa_handler
505 }
506
507
508
509
510 func setSignalstackSP(s *stackt, sp uintptr) {
511 *(*uintptr)(unsafe.Pointer(&s.ss_sp)) = sp
512 }
513
514
515 func (c *sigctxt) fixsigcode(sig uint32) {
516 }
517
518
519
520
521 func sysSigaction(sig uint32, new, old *sigactiont) {
522 if rt_sigaction(uintptr(sig), new, old, unsafe.Sizeof(sigactiont{}.sa_mask)) != 0 {
523
524
525
526
527
528
529
530
531
532
533
534 if sig != 32 && sig != 33 && sig != 64 {
535
536 systemstack(func() {
537 throw("sigaction failed")
538 })
539 }
540 }
541 }
542
543
544
545
546 func rt_sigaction(sig uintptr, new, old *sigactiont, size uintptr) int32
547
548 func getpid() int
549 func tgkill(tgid, tid, sig int)
550
551
552 func signalM(mp *m, sig int) {
553 tgkill(getpid(), int(mp.procid), sig)
554 }
555
556
557 const go118UseTimerCreateProfiler = true
558
559
560
561
562
563
564
565
566 func validSIGPROF(mp *m, c *sigctxt) bool {
567 code := int32(c.sigcode())
568 setitimer := code == _SI_KERNEL
569 timer_create := code == _SI_TIMER
570
571 if !(setitimer || timer_create) {
572
573
574
575 return true
576 }
577
578 if mp == nil {
579
580
581
582
583
584
585
586
587
588
589
590
591 return setitimer
592 }
593
594
595
596 if atomic.Load(&mp.profileTimerValid) != 0 {
597
598
599
600
601
602 return timer_create
603 }
604
605
606 return setitimer
607 }
608
609 func setProcessCPUProfiler(hz int32) {
610 setProcessCPUProfilerTimer(hz)
611 }
612
613 func setThreadCPUProfiler(hz int32) {
614 mp := getg().m
615 mp.profilehz = hz
616
617 if !go118UseTimerCreateProfiler {
618 return
619 }
620
621
622 if atomic.Load(&mp.profileTimerValid) != 0 {
623 timerid := mp.profileTimer
624 atomic.Store(&mp.profileTimerValid, 0)
625 mp.profileTimer = 0
626
627 ret := timer_delete(timerid)
628 if ret != 0 {
629 print("runtime: failed to disable profiling timer; timer_delete(", timerid, ") errno=", -ret, "\n")
630 throw("timer_delete")
631 }
632 }
633
634 if hz == 0 {
635
636 return
637 }
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658 spec := new(itimerspec)
659 spec.it_value.setNsec(1 + int64(fastrandn(uint32(1e9/hz))))
660 spec.it_interval.setNsec(1e9 / int64(hz))
661
662 var timerid int32
663 var sevp sigevent
664 sevp.notify = _SIGEV_THREAD_ID
665 sevp.signo = _SIGPROF
666 sevp.sigev_notify_thread_id = int32(mp.procid)
667 ret := timer_create(_CLOCK_THREAD_CPUTIME_ID, &sevp, &timerid)
668 if ret != 0 {
669
670
671 return
672 }
673
674 ret = timer_settime(timerid, 0, spec, nil)
675 if ret != 0 {
676 print("runtime: failed to configure profiling timer; timer_settime(", timerid,
677 ", 0, {interval: {",
678 spec.it_interval.tv_sec, "s + ", spec.it_interval.tv_nsec, "ns} value: {",
679 spec.it_value.tv_sec, "s + ", spec.it_value.tv_nsec, "ns}}, nil) errno=", -ret, "\n")
680 throw("timer_settime")
681 }
682
683 mp.profileTimer = timerid
684 atomic.Store(&mp.profileTimerValid, 1)
685 }
686
687
688
689 type perThreadSyscallArgs struct {
690 trap uintptr
691 a1 uintptr
692 a2 uintptr
693 a3 uintptr
694 a4 uintptr
695 a5 uintptr
696 a6 uintptr
697 r1 uintptr
698 r2 uintptr
699 }
700
701
702
703
704
705
706 var perThreadSyscall perThreadSyscallArgs
707
708
709
710
711
712
713
714
715
716 func syscall_runtime_doAllThreadsSyscall(trap, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, err uintptr) {
717 if iscgo {
718
719 panic("doAllThreadsSyscall not supported with cgo enabled")
720 }
721
722
723
724
725
726
727
728
729 stopTheWorld("doAllThreadsSyscall")
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751 allocmLock.lock()
752
753
754
755
756
757
758 acquirem()
759
760
761
762
763
764
765 r1, r2, errno := syscall.Syscall6(trap, a1, a2, a3, a4, a5, a6)
766 if GOARCH == "ppc64" || GOARCH == "ppc64le" {
767
768 r2 = 0
769 }
770 if errno != 0 {
771 releasem(getg().m)
772 allocmLock.unlock()
773 startTheWorld()
774 return r1, r2, errno
775 }
776
777 perThreadSyscall = perThreadSyscallArgs{
778 trap: trap,
779 a1: a1,
780 a2: a2,
781 a3: a3,
782 a4: a4,
783 a5: a5,
784 a6: a6,
785 r1: r1,
786 r2: r2,
787 }
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824 for mp := allm; mp != nil; mp = mp.alllink {
825 for atomic.Load64(&mp.procid) == 0 {
826
827 osyield()
828 }
829 }
830
831
832
833 gp := getg()
834 tid := gp.m.procid
835 for mp := allm; mp != nil; mp = mp.alllink {
836 if atomic.Load64(&mp.procid) == tid {
837
838 continue
839 }
840 mp.needPerThreadSyscall.Store(1)
841 signalM(mp, sigPerThreadSyscall)
842 }
843
844
845 for mp := allm; mp != nil; mp = mp.alllink {
846 if mp.procid == tid {
847 continue
848 }
849 for mp.needPerThreadSyscall.Load() != 0 {
850 osyield()
851 }
852 }
853
854 perThreadSyscall = perThreadSyscallArgs{}
855
856 releasem(getg().m)
857 allocmLock.unlock()
858 startTheWorld()
859
860 return r1, r2, errno
861 }
862
863
864
865
866
867
868
869 func runPerThreadSyscall() {
870 gp := getg()
871 if gp.m.needPerThreadSyscall.Load() == 0 {
872 return
873 }
874
875 args := perThreadSyscall
876 r1, r2, errno := syscall.Syscall6(args.trap, args.a1, args.a2, args.a3, args.a4, args.a5, args.a6)
877 if GOARCH == "ppc64" || GOARCH == "ppc64le" {
878
879 r2 = 0
880 }
881 if errno != 0 || r1 != args.r1 || r2 != args.r2 {
882 print("trap:", args.trap, ", a123456=[", args.a1, ",", args.a2, ",", args.a3, ",", args.a4, ",", args.a5, ",", args.a6, "]\n")
883 print("results: got {r1=", r1, ",r2=", r2, ",errno=", errno, "}, want {r1=", args.r1, ",r2=", args.r2, ",errno=0\n")
884 fatal("AllThreadsSyscall6 results differ between threads; runtime corrupted")
885 }
886
887 gp.m.needPerThreadSyscall.Store(0)
888 }
889
View as plain text