Skip to content

Commit 51ec584

Browse files
committed
runtime/internal/atomic: add 386/amd64 And/Or operators
This CL adds the atomic primitives for the And/Or operators on x86-64. It also includes benchmarks for the ops. Note that the race variants for those operators are not yet implemented since we depend on an upstream llvm tsan patch as well as rebuilding the race runtime with x/build/cmd/racebuild. This will come as a separate patch at a later time once the infraestructure and upstream patches supporting it are ready. See llvm/llvm-project#65695 for the llvm tsan patch. For [reserved]
1 parent 6bcf176 commit 51ec584

File tree

5 files changed

+262
-2
lines changed

5 files changed

+262
-2
lines changed

src/runtime/internal/atomic/atomic_386.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,24 @@ func And(ptr *uint32, val uint32)
7676
//go:noescape
7777
func Or(ptr *uint32, val uint32)
7878

79+
//go:noescape
80+
func And32(ptr *uint32, val uint32) uint32
81+
82+
//go:noescape
83+
func Or32(ptr *uint32, val uint32) uint32
84+
85+
//go:noescape
86+
func And64(ptr *uint64, val uint64) uint64
87+
88+
//go:noescape
89+
func Or64(ptr *uint64, val uint64) uint64
90+
91+
//go:noescape
92+
func Anduintptr(ptr *uintptr, val uintptr) uintptr
93+
94+
//go:noescape
95+
func Oruintptr(ptr *uintptr, val uintptr) uintptr
96+
7997
// NOTE: Do not add atomicxor8 (XOR is not idempotent).
8098

8199
//go:noescape

src/runtime/internal/atomic/atomic_386.s

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,3 +283,84 @@ TEXT ·And(SB), NOSPLIT, $0-8
283283
LOCK
284284
ANDL BX, (AX)
285285
RET
286+
287+
// func And32(addr *uint32, v uint32) old uint32
288+
TEXT ·And32(SB), NOSPLIT, $0-12
289+
MOVL ptr+0(FP), BX
290+
MOVL val+4(FP), CX
291+
casloop:
292+
MOVL CX, DX
293+
MOVL (BX), AX
294+
ANDL AX, DX
295+
LOCK
296+
CMPXCHGL DX, (BX)
297+
JNZ casloop
298+
MOVL AX, ret+8(FP)
299+
RET
300+
301+
// func Or32(addr *uint32, v uint32) old uint32
302+
TEXT ·Or32(SB), NOSPLIT, $0-12
303+
MOVL ptr+0(FP), BX
304+
MOVL val+4(FP), CX
305+
casloop:
306+
MOVL CX, DX
307+
MOVL (BX), AX
308+
ORL AX, DX
309+
LOCK
310+
CMPXCHGL DX, (BX)
311+
JNZ casloop
312+
MOVL AX, ret+8(FP)
313+
RET
314+
315+
// func And64(addr *uint64, v uint64) old uint64
316+
TEXT ·And64(SB), NOSPLIT, $0-20
317+
MOVL ptr+0(FP), BP
318+
// DI:SI = v
319+
MOVL val_lo+4(FP), SI
320+
MOVL val_hi+8(FP), DI
321+
// DX:AX = *addr
322+
MOVL 0(BP), AX
323+
MOVL 4(BP), DX
324+
casloop:
325+
// CX:BX = DX:AX (*addr) & DI:SI (mask)
326+
MOVL AX, BX
327+
MOVL DX, CX
328+
ANDL SI, BX
329+
ANDL DI, CX
330+
LOCK
331+
CMPXCHG8B 0(BP)
332+
JNZ casloop
333+
MOVL AX, ret_lo+12(FP)
334+
MOVL DX, ret_hi+16(FP)
335+
RET
336+
337+
338+
// func Or64(addr *uint64, v uint64) old uint64
339+
TEXT ·Or64(SB), NOSPLIT, $0-20
340+
MOVL ptr+0(FP), BP
341+
// DI:SI = v
342+
MOVL val_lo+4(FP), SI
343+
MOVL val_hi+8(FP), DI
344+
// DX:AX = *addr
345+
MOVL 0(BP), AX
346+
MOVL 4(BP), DX
347+
casloop:
348+
// CX:BX = DX:AX (*addr) | DI:SI (mask)
349+
MOVL AX, BX
350+
MOVL DX, CX
351+
ORL SI, BX
352+
ORL DI, CX
353+
LOCK
354+
CMPXCHG8B 0(BP)
355+
JNZ casloop
356+
MOVL AX, ret_lo+12(FP)
357+
MOVL DX, ret_hi+16(FP)
358+
RET
359+
360+
// func Anduintptr(addr *uintptr, v uintptr) old uintptr
361+
TEXT ·Anduintptr(SB), NOSPLIT, $0-12
362+
JMP ·And32(SB)
363+
364+
// func Oruintptr(addr *uintptr, v uintptr) old uintptr
365+
TEXT ·Oruintptr(SB), NOSPLIT, $0-12
366+
JMP ·Or32(SB)

src/runtime/internal/atomic/atomic_amd64.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,24 @@ func And(ptr *uint32, val uint32)
8484
//go:noescape
8585
func Or(ptr *uint32, val uint32)
8686

87+
//go:noescape
88+
func And32(ptr *uint32, val uint32) uint32
89+
90+
//go:noescape
91+
func Or32(ptr *uint32, val uint32) uint32
92+
93+
//go:noescape
94+
func And64(ptr *uint64, val uint64) uint64
95+
96+
//go:noescape
97+
func Or64(ptr *uint64, val uint64) uint64
98+
99+
//go:noescape
100+
func Anduintptr(ptr *uintptr, val uintptr) uintptr
101+
102+
//go:noescape
103+
func Oruintptr(ptr *uintptr, val uintptr) uintptr
104+
87105
// NOTE: Do not add atomicxor8 (XOR is not idempotent).
88106

89107
//go:noescape

src/runtime/internal/atomic/atomic_amd64.s

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,3 +223,67 @@ TEXT ·And(SB), NOSPLIT, $0-12
223223
LOCK
224224
ANDL BX, (AX)
225225
RET
226+
227+
// func Or32(addr *uint32, v uint32) old uint32
228+
TEXT ·Or32(SB), NOSPLIT, $0-20
229+
MOVQ ptr+0(FP), BX
230+
MOVL val+8(FP), CX
231+
casloop:
232+
MOVL CX, DX
233+
MOVL (BX), AX
234+
ORL AX, DX
235+
LOCK
236+
CMPXCHGL DX, (BX)
237+
JNZ casloop
238+
MOVL AX, ret+16(FP)
239+
RET
240+
241+
// func And32(addr *uint32, v uint32) old uint32
242+
TEXT ·And32(SB), NOSPLIT, $0-20
243+
MOVQ ptr+0(FP), BX
244+
MOVL val+8(FP), CX
245+
casloop:
246+
MOVL CX, DX
247+
MOVL (BX), AX
248+
ANDL AX, DX
249+
LOCK
250+
CMPXCHGL DX, (BX)
251+
JNZ casloop
252+
MOVL AX, ret+16(FP)
253+
RET
254+
255+
// func Or64(addr *uint64, v uint64) old uint64
256+
TEXT ·Or64(SB), NOSPLIT, $0-24
257+
MOVQ ptr+0(FP), BX
258+
MOVQ val+8(FP), CX
259+
casloop:
260+
MOVQ CX, DX
261+
MOVQ (BX), AX
262+
ORQ AX, DX
263+
LOCK
264+
CMPXCHGQ DX, (BX)
265+
JNZ casloop
266+
MOVQ AX, ret+16(FP)
267+
RET
268+
269+
// func And64(addr *uint64, v uint64) old uint64
270+
TEXT ·And64(SB), NOSPLIT, $0-24
271+
MOVQ ptr+0(FP), BX
272+
MOVQ val+8(FP), CX
273+
casloop:
274+
MOVQ CX, DX
275+
MOVQ (BX), AX
276+
ANDQ AX, DX
277+
LOCK
278+
CMPXCHGQ DX, (BX)
279+
JNZ casloop
280+
MOVQ AX, ret+16(FP)
281+
RET
282+
283+
// func Anduintptr(addr *uintptr, v uintptr) old uintptr
284+
TEXT ·Anduintptr(SB), NOSPLIT, $0-24
285+
JMP ·And64(SB)
286+
287+
// func Oruintptr(addr *uintptr, v uintptr) old uintptr
288+
TEXT ·Oruintptr(SB), NOSPLIT, $0-24
289+
JMP ·Or64(SB)

src/runtime/internal/atomic/atomic_andor_test.go

Lines changed: 81 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
//go:build wasm || ppc64 || ppc64le
2-
// +build wasm ppc64 ppc64le
1+
//go:build 386 || amd64 || ppc64 || ppc64le || wasm
32

43
//
54
// Copyright 2023 The Go Authors. All rights reserved.
@@ -168,3 +167,83 @@ func TestOr64(t *testing.T) {
168167
}
169168
}
170169
}
170+
171+
func BenchmarkAnd32(b *testing.B) {
172+
var x [128]uint32 // give x its own cache line
173+
sink = &x
174+
for i := 0; i < b.N; i++ {
175+
atomic.And32(&x[63], uint32(i))
176+
}
177+
}
178+
179+
func BenchmarkAnd32Parallel(b *testing.B) {
180+
var x [128]uint32 // give x its own cache line
181+
sink = &x
182+
b.RunParallel(func(pb *testing.PB) {
183+
i := uint32(0)
184+
for pb.Next() {
185+
atomic.And32(&x[63], i)
186+
i++
187+
}
188+
})
189+
}
190+
191+
func BenchmarkAnd64(b *testing.B) {
192+
var x [128]uint64 // give x its own cache line
193+
sink = &x
194+
for i := 0; i < b.N; i++ {
195+
atomic.And64(&x[63], uint64(i))
196+
}
197+
}
198+
199+
func BenchmarkAnd64Parallel(b *testing.B) {
200+
var x [128]uint64 // give x its own cache line
201+
sink = &x
202+
b.RunParallel(func(pb *testing.PB) {
203+
i := uint64(0)
204+
for pb.Next() {
205+
atomic.And64(&x[63], i)
206+
i++
207+
}
208+
})
209+
}
210+
211+
func BenchmarkOr32(b *testing.B) {
212+
var x [128]uint32 // give x its own cache line
213+
sink = &x
214+
for i := 0; i < b.N; i++ {
215+
atomic.Or32(&x[63], uint32(i))
216+
}
217+
}
218+
219+
func BenchmarkOr32Parallel(b *testing.B) {
220+
var x [128]uint32 // give x its own cache line
221+
sink = &x
222+
b.RunParallel(func(pb *testing.PB) {
223+
i := uint32(0)
224+
for pb.Next() {
225+
atomic.Or32(&x[63], i)
226+
i++
227+
}
228+
})
229+
}
230+
231+
func BenchmarkOr64(b *testing.B) {
232+
var x [128]uint64 // give x its own cache line
233+
sink = &x
234+
for i := 0; i < b.N; i++ {
235+
atomic.Or64(&x[63], uint64(i))
236+
}
237+
}
238+
239+
func BenchmarkOr64Parallel(b *testing.B) {
240+
var x [128]uint64 // give x its own cache line
241+
sink = &x
242+
b.RunParallel(func(pb *testing.PB) {
243+
i := uint64(0)
244+
for pb.Next() {
245+
atomic.Or64(&x[63], i)
246+
i++
247+
}
248+
})
249+
}

0 commit comments

Comments
 (0)