From 9e30e72cb73d6be34b77ccdd4bdaedb03ea33c11 Mon Sep 17 00:00:00 2001 From: Philipp Doerner Date: Tue, 9 Jan 2024 20:01:45 +0100 Subject: [PATCH 01/11] #55 Refactor head and tail counters to atomics Head and Tail on ChannelRaw get accessed in channelSend and channelReceive before a lock acquisition happens in those procs. That means that the output of the template they get used in (isEmpty/isFull) is dependent on a data-race. That is because as a thread that has the lock may modify those values while they are being read by another thrread without the lock. This is noticed by thread-sanitizer and should be eliminated for the following reasons: - Users should not catch "false positives" for thread sanitizier such as this, which they currently will - It is generally a good idea to have multi-threaded code such as this data-race free This refactor to atomics makes the entire thing atomic and thus impossible to have a data-race with. --- threading/channels.nim | 52 ++++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/threading/channels.nim b/threading/channels.nim index cbfe50a..ae510e0 100644 --- a/threading/channels.nim +++ b/threading/channels.nim @@ -111,25 +111,37 @@ type lock: Lock spaceAvailableCV, dataAvailableCV: Cond slots: int ## Number of item slots in the buffer - head: int ## Write/enqueue/send index - tail: int ## Read/dequeue/receive index + head: Atomic[int] ## Write/enqueue/send index + tail: Atomic[int] ## Read/dequeue/receive index buffer: ptr UncheckedArray[byte] atomicCounter: Atomic[int] # ------------------------------------------------------------------------------ +func getTail(chan: ChannelRaw, order: MemoryOrder = moRelaxed): int {.inline.} = + chan.tail.load(order) + +func getHead(chan: ChannelRaw, order: MemoryOrder = moRelaxed): int {.inline.} = + chan.head.load(order) + +proc setTail(chan: ChannelRaw, value: int, order: MemoryOrder = moRelaxed) {.inline.} = + chan.tail.store(value, order) + +proc setHead(chan: ChannelRaw, value: int, order: MemoryOrder = moRelaxed) {.inline.} = + chan.head.store(value, order) + func numItems(chan: ChannelRaw): int {.inline.} = - result = chan.head - chan.tail + result = chan.getHead() - chan.getTail() if result < 0: inc(result, 2 * chan.slots) assert result <= chan.slots template isFull(chan: ChannelRaw): bool = - abs(chan.head - chan.tail) == chan.slots + abs(chan.getHead() - chan.getTail()) == chan.slots template isEmpty(chan: ChannelRaw): bool = - chan.head == chan.tail + chan.getHead() == chan.getTail() # Channels memory ops # ------------------------------------------------------------------------------ @@ -145,8 +157,8 @@ proc allocChannel(size, n: int): ChannelRaw = initCond(result.dataAvailableCV) result.slots = n - result.head = 0 - result.tail = 0 + result.setHead(0) + result.setTail(0) result.atomicCounter.store(0, moRelaxed) @@ -186,16 +198,16 @@ proc channelSend(chan: ChannelRaw, data: pointer, size: int, blocking: static bo assert not chan.isFull() - let writeIdx = if chan.head < chan.slots: - chan.head + let writeIdx = if chan.getHead() < chan.slots: + chan.getHead() else: - chan.head - chan.slots + chan.getHead() - chan.slots copyMem(chan.buffer[writeIdx * size].addr, data, size) - - inc(chan.head) - if chan.head == 2 * chan.slots: - chan.head = 0 + + atomicInc(chan.head) + if chan.getHead() == 2 * chan.slots: + chan.setHead(0) signal(chan.dataAvailableCV) release(chan.lock) @@ -221,16 +233,16 @@ proc channelReceive(chan: ChannelRaw, data: pointer, size: int, blocking: static assert not chan.isEmpty() - let readIdx = if chan.tail < chan.slots: - chan.tail + let readIdx = if chan.getTail() < chan.slots: + chan.getTail() else: - chan.tail - chan.slots + chan.getTail() - chan.slots copyMem(data, chan.buffer[readIdx * size].addr, size) - inc(chan.tail) - if chan.tail == 2 * chan.slots: - chan.tail = 0 + atomicInc(chan.tail) + if chan.getTail() == 2 * chan.slots: + chan.setTail(0) signal(chan.spaceAvailableCV) release(chan.lock) From 5eb968428d6b99005bb0c386ca7f1f556f99a72c Mon Sep 17 00:00:00 2001 From: Philipp Doerner Date: Tue, 9 Jan 2024 20:02:41 +0100 Subject: [PATCH 02/11] #55 Refactor access to atomicCounter Tail and head are atomics and received getter/setter procs for better readability. For consistency, this was now also done for the atomicCounter field. --- threading/channels.nim | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/threading/channels.nim b/threading/channels.nim index ae510e0..a4f99b6 100644 --- a/threading/channels.nim +++ b/threading/channels.nim @@ -130,6 +130,12 @@ proc setTail(chan: ChannelRaw, value: int, order: MemoryOrder = moRelaxed) {.inl proc setHead(chan: ChannelRaw, value: int, order: MemoryOrder = moRelaxed) {.inline.} = chan.head.store(value, order) +func getAtomicCounter(chan: ChannelRaw, order: MemoryOrder = moRelaxed): int {.inline.} = + chan.atomicCounter.load(order) + +proc setAtomicCounter(chan: ChannelRaw, value: int, order: MemoryOrder = moRelaxed) {.inline.} = + chan.atomicCounter.store(value, order) + func numItems(chan: ChannelRaw): int {.inline.} = result = chan.getHead() - chan.getTail() if result < 0: @@ -159,7 +165,7 @@ proc allocChannel(size, n: int): ChannelRaw = result.slots = n result.setHead(0) result.setTail(0) - result.atomicCounter.store(0, moRelaxed) + result.setAtomicCounter(0) proc freeChannel(chan: ChannelRaw) = @@ -258,7 +264,7 @@ type when defined(nimAllowNonVarDestructor): proc `=destroy`*[T](c: Chan[T]) = if c.d != nil: - if load(c.d.atomicCounter, moAcquire) == 0: + if c.d.getAtomicCounter(moAcquire) == 0: if c.d.buffer != nil: freeChannel(c.d) else: @@ -266,7 +272,7 @@ when defined(nimAllowNonVarDestructor): else: proc `=destroy`*[T](c: var Chan[T]) = if c.d != nil: - if load(c.d.atomicCounter, moAcquire) == 0: + if c.d.getAtomicCounter(moAcquire) == 0: if c.d.buffer != nil: freeChannel(c.d) else: From 80bd5e4cfd204735ff46525d62f985c753a2858f Mon Sep 17 00:00:00 2001 From: Philipp Doerner Date: Tue, 9 Jan 2024 21:21:26 +0100 Subject: [PATCH 03/11] #55 Add thread sanitization check to tests This allows spotting data-races in the threading code automatically --- tests/nim.cfg | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/nim.cfg b/tests/nim.cfg index 23f43b3..2b76a1e 100644 --- a/tests/nim.cfg +++ b/tests/nim.cfg @@ -1,3 +1,7 @@ --path:"../" --gc:arc --threads:on +--cc:clang +--debugger:native +--passc:"-fsanitize=thread -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer" +--passl:"-fsanitize=thread -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer" \ No newline at end of file From 92cd319a603b393b08a841088e35406e486683b9 Mon Sep 17 00:00:00 2001 From: Philipp Doerner Date: Wed, 10 Jan 2024 17:10:46 +0100 Subject: [PATCH 04/11] #55 Remove data-races caused by nim allocator --- tests/nim.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/nim.cfg b/tests/nim.cfg index 2b76a1e..831857f 100644 --- a/tests/nim.cfg +++ b/tests/nim.cfg @@ -2,6 +2,7 @@ --gc:arc --threads:on --cc:clang +--define:useMalloc --debugger:native --passc:"-fsanitize=thread -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer" --passl:"-fsanitize=thread -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer" \ No newline at end of file From 4999602ac034d097acda83b4a8d168a83ddb090f Mon Sep 17 00:00:00 2001 From: Philipp Doerner Date: Wed, 10 Jan 2024 17:22:43 +0100 Subject: [PATCH 05/11] #55 Remove smart-ptr data-race Using the memory order "Release" here enables a data-race according to tsan. The data-race being *potentially* a false-positive. The race it identifies is between 2 threads both trying to destroy the smartPtr at the same time. They'll both call the decr proc in smartptrs.nim:92 and get past the nil check. Then one of them might get past the second if check and deallocate the ptr. However, that should be impossible to lead to a data race. The count of 0 can only be reached *once* if you're the last thread. I can only assume compiler-order-reshuffling may enable a race here. Using the default mode (SeqCst) as well as Acquire Release (AcqRel) gets rid of said data-race. --- threading/smartptrs.nim | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/threading/smartptrs.nim b/threading/smartptrs.nim index 6a077f5..fb62540 100644 --- a/threading/smartptrs.nim +++ b/threading/smartptrs.nim @@ -93,7 +93,7 @@ proc decr[T](p: SharedPtr[T]) {.inline.} = if p.val != nil: # this `fetchSub` returns current val then subs # so count == 0 means we're the last - if p.val.counter.fetchSub(1, Release) == 0: + if p.val.counter.fetchSub(1, AcqRel) == 0: `=destroy`(p.val.value) deallocShared(p.val) @@ -119,7 +119,7 @@ proc newSharedPtr*[T](val: sink Isolated[T]): SharedPtr[T] {.nodestroy.} = ## Returns a shared pointer which shares ## ownership of the object by reference counting. result.val = cast[typeof(result.val)](allocShared(sizeof(result.val[]))) - int(result.val.counter) = 0 + result.val.counter.store(0) result.val.value = extract val template newSharedPtr*[T](val: T): SharedPtr[T] = From 4c38e9dabfc2c5dcaf9e4b8c0e49fdf54b47c1c7 Mon Sep 17 00:00:00 2001 From: Philipp Doerner Date: Wed, 10 Jan 2024 17:29:43 +0100 Subject: [PATCH 06/11] #55 add test binaries to gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 32e1dc0..dc17cc5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ nimcache/ nimblecache/ htmldocs/ +tests/tsmartptrsleak +tests/tchannels_simple From 06f409aa524abe3532c3a1cb458d2148d7f4b9d6 Mon Sep 17 00:00:00 2001 From: Philipp Doerner Date: Wed, 10 Jan 2024 18:47:39 +0100 Subject: [PATCH 07/11] #55 Remove tsan for windows Clang does not support tsan for windows. The checks for linux and macOs should suffice. --- tests/nim.cfg | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/nim.cfg b/tests/nim.cfg index 831857f..8e3e891 100644 --- a/tests/nim.cfg +++ b/tests/nim.cfg @@ -3,6 +3,8 @@ --threads:on --cc:clang --define:useMalloc ---debugger:native ---passc:"-fsanitize=thread -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer" ---passl:"-fsanitize=thread -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer" \ No newline at end of file +@if not windows: + --debugger:native + --passc:"-fsanitize=thread -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer" + --passl:"-fsanitize=thread -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer" +@end \ No newline at end of file From b0d4527abf22de070060dfa5b8dead14aad76a03 Mon Sep 17 00:00:00 2001 From: Philipp Doerner Date: Wed, 10 Jan 2024 18:54:03 +0100 Subject: [PATCH 08/11] #55 Don't use clang for windows --- tests/nim.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/nim.cfg b/tests/nim.cfg index 8e3e891..4f61e49 100644 --- a/tests/nim.cfg +++ b/tests/nim.cfg @@ -1,9 +1,9 @@ --path:"../" --gc:arc --threads:on ---cc:clang --define:useMalloc @if not windows: + --cc:clang --debugger:native --passc:"-fsanitize=thread -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer" --passl:"-fsanitize=thread -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer" From b37d49af67bf67cf9e480295bb1f556e35aa6d07 Mon Sep 17 00:00:00 2001 From: Philipp Doerner Date: Wed, 10 Jan 2024 19:16:15 +0100 Subject: [PATCH 09/11] #55 Attempt to use clang on windows with special case for i386 i386 is a 32bit platform. Clang by default is 64bit. It should be as simple as just requiring clang to build in 32bit mode with -m32. --- tests/nim.cfg | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/nim.cfg b/tests/nim.cfg index 4f61e49..b65e936 100644 --- a/tests/nim.cfg +++ b/tests/nim.cfg @@ -2,9 +2,13 @@ --gc:arc --threads:on --define:useMalloc +--cc:clang @if not windows: - --cc:clang --debugger:native --passc:"-fsanitize=thread -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer" --passl:"-fsanitize=thread -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer" +@end +@if windows and i386: + --passc:"-m32" + --passl:"-m32" @end \ No newline at end of file From 172a558a194505294d6724c005f35272279e30d9 Mon Sep 17 00:00:00 2001 From: Andreas Rumpf Date: Thu, 11 Jan 2024 08:00:22 +0100 Subject: [PATCH 10/11] Apply suggestions from code review --- threading/channels.nim | 1 - 1 file changed, 1 deletion(-) diff --git a/threading/channels.nim b/threading/channels.nim index a4f99b6..fd826e7 100644 --- a/threading/channels.nim +++ b/threading/channels.nim @@ -210,7 +210,6 @@ proc channelSend(chan: ChannelRaw, data: pointer, size: int, blocking: static bo chan.getHead() - chan.slots copyMem(chan.buffer[writeIdx * size].addr, data, size) - atomicInc(chan.head) if chan.getHead() == 2 * chan.slots: chan.setHead(0) From 983d13affa08700f6b6269d0c923f51c06d5944c Mon Sep 17 00:00:00 2001 From: Philipp Doerner Date: Fri, 12 Jan 2024 17:28:49 +0100 Subject: [PATCH 11/11] #55 Migrate to use threadings own atomics --- threading/channels.nim | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/threading/channels.nim b/threading/channels.nim index a4f99b6..223374f 100644 --- a/threading/channels.nim +++ b/threading/channels.nim @@ -99,7 +99,8 @@ runnableExamples("--threads:on --gc:orc"): when not defined(gcArc) and not defined(gcOrc) and not defined(nimdoc): {.error: "This channel implementation requires --gc:arc or --gc:orc".} -import std/[locks, atomics, isolation] +import std/[locks, isolation] +import ./atomics import system/ansi_c # Channel @@ -118,22 +119,22 @@ type # ------------------------------------------------------------------------------ -func getTail(chan: ChannelRaw, order: MemoryOrder = moRelaxed): int {.inline.} = +func getTail(chan: ChannelRaw, order: Ordering = Relaxed): int {.inline.} = chan.tail.load(order) -func getHead(chan: ChannelRaw, order: MemoryOrder = moRelaxed): int {.inline.} = +func getHead(chan: ChannelRaw, order: Ordering = Relaxed): int {.inline.} = chan.head.load(order) -proc setTail(chan: ChannelRaw, value: int, order: MemoryOrder = moRelaxed) {.inline.} = +proc setTail(chan: ChannelRaw, value: int, order: Ordering = Relaxed) {.inline.} = chan.tail.store(value, order) -proc setHead(chan: ChannelRaw, value: int, order: MemoryOrder = moRelaxed) {.inline.} = +proc setHead(chan: ChannelRaw, value: int, order: Ordering = Relaxed) {.inline.} = chan.head.store(value, order) -func getAtomicCounter(chan: ChannelRaw, order: MemoryOrder = moRelaxed): int {.inline.} = +func getAtomicCounter(chan: ChannelRaw, order: Ordering = Relaxed): int {.inline.} = chan.atomicCounter.load(order) -proc setAtomicCounter(chan: ChannelRaw, value: int, order: MemoryOrder = moRelaxed) {.inline.} = +proc setAtomicCounter(chan: ChannelRaw, value: int, order: Ordering = Relaxed) {.inline.} = chan.atomicCounter.store(value, order) func numItems(chan: ChannelRaw): int {.inline.} = @@ -264,7 +265,7 @@ type when defined(nimAllowNonVarDestructor): proc `=destroy`*[T](c: Chan[T]) = if c.d != nil: - if c.d.getAtomicCounter(moAcquire) == 0: + if c.d.getAtomicCounter(Acquire) == 0: if c.d.buffer != nil: freeChannel(c.d) else: @@ -272,7 +273,7 @@ when defined(nimAllowNonVarDestructor): else: proc `=destroy`*[T](c: var Chan[T]) = if c.d != nil: - if c.d.getAtomicCounter(moAcquire) == 0: + if c.d.getAtomicCounter(Acquire) == 0: if c.d.buffer != nil: freeChannel(c.d) else: