Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit f5d8533

Browse files
committedDec 11, 2019
failed tentative to undeadlock the backoff
1 parent 9a62e9c commit f5d8533

File tree

2 files changed

+64
-16
lines changed

2 files changed

+64
-16
lines changed
 

‎weave/channels/event_notifiers.nim

+63-16
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,20 @@ import
4848
../config
4949

5050
type
51+
# Need to workaround Enum not being Atomics, pending: https://github.com/nim-lang/Nim/issues/12812
52+
# so we enforce its size and will cast before use
53+
ConsumerState {.size: sizeof(uint8).}= enum
54+
# We need 4 states to solve a race condition, see (https://github.com/mratsim/weave/issues/27)
55+
# A child signals its parent that it goes to sleep via a steal request.
56+
# Its parent tries to wake it up but the child is not sleeping
57+
# The child goes to sleep and system is deadlocked.
58+
# Instead, before signaling via the channel it should also notify
59+
# its intent to sleep, and then commit/rollback its intent once it has sent its message.
60+
Busy
61+
IntendToSleep
62+
Parked
63+
ShouldWakeup
64+
5165
EventNotifier* = object
5266
## Multi Producers, Single Consumer event notification
5367
## This is wait-free for producers and avoid spending time
@@ -61,33 +75,56 @@ type
6175
## See also: binary semaphores, eventcount
6276
## On Windows: ManuallyResetEvent and AutoResetEvent
6377
cond{.align: WV_CacheLinePadding.}: Cond
64-
lock: Lock
65-
waiting: Atomic[bool]
78+
lock: Lock # The lock is never used, it's just there for the condition variable
79+
consumerState: ConsumerState
80+
81+
template load(c: ConsumerState, mo: MemoryOrder): ConsumerState =
82+
cast[ConsumerState](cast[var Atomic[uint8]](c.addr).load(mo))
83+
84+
template store(c: var ConsumerState, val: ConsumerState, mo: MemoryOrder) =
85+
cast[var Atomic[uint8]](c.addr).store(cast[uint8](val), mo)
86+
87+
template compareExchange(dst, expected: var ConsumerState, target: ConsumerState, mo: MemoryOrder): bool =
88+
compareExchange(
89+
cast[var Atomic[uint8]](dst.addr),
90+
cast[var uint8](expected.addr),
91+
cast[uint8](target), mo
92+
)
6693

6794
func initialize*(en: var EventNotifier) =
6895
en.cond.initCond()
6996
en.lock.initLock()
70-
en.waiting.store(false, moRelaxed)
97+
en.consumerState.store(Busy, moRelaxed)
7198

7299
func `=destroy`*(en: var EventNotifier) =
73100
en.cond.deinitCond()
74101
en.lock.deinitLock()
75102

103+
func intendToSleep*(en: var EventNotifier) {.inline.} =
104+
## The consumer intends to sleep soon.
105+
## This must be called before the formal notification
106+
## via a channel.
107+
assert en.consumerState.load(moRelaxed) == Busy
108+
109+
fence(moRelease)
110+
en.consumerState.store(IntendToSleep, moRelaxed)
111+
112+
76113
func wait*(en: var EventNotifier) {.inline.} =
77114
## Wait until we are signaled of an event
78115
## Thread is parked and does not consume CPU resources
79-
assert not en.waiting.load(moRelaxed)
80-
81-
fence(moRelease)
82-
en.waiting.store(true, moRelaxed)
83116

84-
en.lock.acquire()
85-
while en.waiting.load(moRelaxed):
86-
fence(moAcquire)
87-
en.cond.wait(en.lock)
88-
en.lock.release()
117+
var expected = IntendToSleep
118+
if compareExchange(en.consumerState, expected, Parked, moAcquireRelease):
119+
while en.consumerState.load(moRelaxed) == Parked:
120+
# We only used the lock for the condition variable, we protect via atomics otherwise
121+
fence(moAcquire)
122+
en.cond.wait(en.lock)
89123

90-
assert not en.waiting.load(moRelaxed)
124+
# If we failed to sleep or just woke up
125+
# we return to the busy state
126+
fence(moRelease)
127+
en.consumerState.store(Busy, moRelaxed)
91128

92129
# TODO there is a deadlock at the moment as a worker sending a
93130
# WAITING steal request and then actually waiting is not atomic
@@ -97,10 +134,20 @@ func notify*(en: var EventNotifier) {.inline.} =
97134
## Signal a thread that it can be unparked
98135

99136
# No thread waiting, return
100-
if not en.waiting.load(moRelaxed):
137+
let consumerState = en.consumerState.load(moRelaxed)
138+
if consumerState in {Busy, ShouldWakeup}:
101139
fence(moAcquire)
102140
return
103141

104142
fence(moRelease)
105-
en.waiting.store(false, moRelaxed)
106-
en.cond.signal()
143+
en.consumerState.store(ShouldWakeup, moRelaxed)
144+
while true:
145+
# We might signal "ShouldWakeUp" after the consumer check
146+
# and just before it waits so we need to loop the signal
147+
# until it's sure the consumer is back to busy.
148+
fence(moAcquire)
149+
en.cond.signal()
150+
if en.consumerState.load(moAcquire) != Busy:
151+
cpuRelax()
152+
else:
153+
break

‎weave/thieves.nim

+1
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,7 @@ proc lastStealAttemptFailure*(req: sink StealRequest) =
195195
req.state = Waiting
196196
debugTermination:
197197
log("Worker %2d: sends state passively WAITING to its parent worker %d\n", myID(), myWorker().parent)
198+
myParking().intendToSleep()
198199
sendShare(req)
199200
ascertain: not myWorker().isWaiting
200201
myWorker().isWaiting = true

0 commit comments

Comments
 (0)
Please sign in to comment.