Skip to content

Commit

Permalink
runtime: disable use of runnext on wasm
Browse files Browse the repository at this point in the history
When readying a goroutine, the scheduler typically places the readied
goroutine in pp.runnext, which will typically be the next goroutine to
run in the schedule.

In order to prevent a set of ping-pong goroutines from simply switching
back and forth via runnext and starving the rest of the run queue, a
goroutine scheduled via runnext shares a time slice (pp.schedtick) with
the previous goroutine.

sysmon detects "long-running goroutines", which really means Ps using
the same pp.schedtick for too long, and preempts them to allow the rest
of the run queue to run. Thus this avoids starvation via runnext.

However, wasm has no threads, and thus no sysmon. Without sysmon to
preempt, the possibility for starvation returns. Avoid this by disabling
runnext entirely on wasm. This means that readied goroutines always go
on the end of the run queue and thus cannot starve via runnext.

Note that this CL doesn't do anything about single long-running
goroutines. Without sysmon to preempt them, a single goroutine that
fails to yield will starve the run queue indefinitely.

For #65178.

Change-Id: I10859d088776125a2af8c9cd862b6e071da628b5
Cq-Include-Trybots: luci.golang.try:gotip-js-wasm,gotip-wasip1-wasm_wasmtime,gotip-wasip1-wasm_wazero
Reviewed-on: https://go-review.googlesource.com/c/go/+/559798
Auto-Submit: Bryan Mills <[email protected]>
Reviewed-by: Michael Pratt <[email protected]>
LUCI-TryBot-Result: Go LUCI <[email protected]>
  • Loading branch information
prattmic authored and gopherbot committed Feb 27, 2024
1 parent 2b72395 commit a6a5c30
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 24 deletions.
17 changes: 0 additions & 17 deletions src/net/net_fake.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ import (
"errors"
"io"
"os"
"runtime"
"sync"
"sync/atomic"
"syscall"
Expand Down Expand Up @@ -517,14 +516,6 @@ func (pq *packetQueue) send(dt *deadlineTimer, b []byte, from sockaddr, block bo
full = pq.full
}

// Before we check dt.expired, yield to other goroutines.
// This may help to prevent starvation of the goroutine that runs the
// deadlineTimer's time.After callback.
//
// TODO(#65178): Remove this when the runtime scheduler no longer starves
// runnable goroutines.
runtime.Gosched()

select {
case <-dt.expired:
return 0, os.ErrDeadlineExceeded
Expand Down Expand Up @@ -576,14 +567,6 @@ func (pq *packetQueue) recvfrom(dt *deadlineTimer, b []byte, wholePacket bool, c
empty = pq.empty
}

// Before we check dt.expired, yield to other goroutines.
// This may help to prevent starvation of the goroutine that runs the
// deadlineTimer's time.After callback.
//
// TODO(#65178): Remove this when the runtime scheduler no longer starves
// runnable goroutines.
runtime.Gosched()

select {
case <-dt.expired:
return 0, nil, os.ErrDeadlineExceeded
Expand Down
23 changes: 21 additions & 2 deletions src/runtime/proc.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ func main() {
// Allow newproc to start new Ms.
mainStarted = true

if GOARCH != "wasm" { // no threads on wasm yet, so no sysmon
if haveSysmon {
systemstack(func() {
newm(sysmon, nil, -1)
})
Expand Down Expand Up @@ -5941,6 +5941,11 @@ var forcegcperiod int64 = 2 * 60 * 1e9
// golang.org/issue/42515 is needed on NetBSD.
var needSysmonWorkaround bool = false

// haveSysmon indicates whether there is sysmon thread support.
//
// No threads on wasm yet, so no sysmon.
const haveSysmon = GOARCH != "wasm"

// Always runs without a P, so write barriers are not allowed.
//
//go:nowritebarrierrec
Expand Down Expand Up @@ -6121,7 +6126,10 @@ func retake(now int64) uint32 {
s := pp.status
sysretake := false
if s == _Prunning || s == _Psyscall {
// Preempt G if it's running for too long.
// Preempt G if it's running on the same schedtick for
// too long. This could be from a single long-running
// goroutine or a sequence of goroutines run via
// runnext, which share a single schedtick time slice.
t := int64(pp.schedtick)
if int64(pd.schedtick) != t {
pd.schedtick = uint32(t)
Expand Down Expand Up @@ -6634,6 +6642,17 @@ const randomizeScheduler = raceenabled
// If the run queue is full, runnext puts g on the global queue.
// Executed only by the owner P.
func runqput(pp *p, gp *g, next bool) {
if !haveSysmon && next {
// A runnext goroutine shares the same time slice as the
// current goroutine (inheritTime from runqget). To prevent a
// ping-pong pair of goroutines from starving all others, we
// depend on sysmon to preempt "long-running goroutines". That
// is, any set of goroutines sharing the same time slice.
//
// If there is no sysmon, we must avoid runnext entirely or
// risk starvation.
next = false
}
if randomizeScheduler && next && randn(2) == 0 {
next = false
}
Expand Down
5 changes: 0 additions & 5 deletions src/time/sleep_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,11 +111,6 @@ func TestAfterFuncStarvation(t *testing.T) {
// the AfterFunc goroutine instead of the runnable channel goroutine.
// However, in https://go.dev/issue/65178 this was observed to live-lock
// on wasip1/wasm and js/wasm after <10000 runs.

if runtime.GOARCH == "wasm" {
testenv.SkipFlaky(t, 65178)
}

defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1))

var (
Expand Down

0 comments on commit a6a5c30

Please sign in to comment.