Skip to content

Commit 36f3ec5

Browse files
mauri870randall77
authored andcommitted
runtime/internal/atomic: add 386/amd64 And/Or operators
This CL adds the atomic primitives for the And/Or operators on x86-64. It also includes missing benchmarks for the ops. For #61395 Change-Id: I23ef5192866d21fc3a479d0159edeafc3aeb5c47 GitHub-Last-Rev: df800be GitHub-Pull-Request: #62621 Reviewed-on: https://go-review.googlesource.com/c/go/+/528315 Reviewed-by: Keith Randall <[email protected]> Reviewed-by: Matthew Dempsky <[email protected]> TryBot-Result: Gopher Robot <[email protected]> Reviewed-by: Keith Randall <[email protected]> Run-TryBot: Mauri de Souza Meneguzzo <[email protected]>
1 parent 954a963 commit 36f3ec5

File tree

5 files changed

+262
-1
lines changed

5 files changed

+262
-1
lines changed

src/runtime/internal/atomic/atomic_386.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,24 @@ func And(ptr *uint32, val uint32)
7676
//go:noescape
7777
func Or(ptr *uint32, val uint32)
7878

79+
//go:noescape
80+
func And32(ptr *uint32, val uint32) uint32
81+
82+
//go:noescape
83+
func Or32(ptr *uint32, val uint32) uint32
84+
85+
//go:noescape
86+
func And64(ptr *uint64, val uint64) uint64
87+
88+
//go:noescape
89+
func Or64(ptr *uint64, val uint64) uint64
90+
91+
//go:noescape
92+
func Anduintptr(ptr *uintptr, val uintptr) uintptr
93+
94+
//go:noescape
95+
func Oruintptr(ptr *uintptr, val uintptr) uintptr
96+
7997
// NOTE: Do not add atomicxor8 (XOR is not idempotent).
8098

8199
//go:noescape

src/runtime/internal/atomic/atomic_386.s

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,3 +283,84 @@ TEXT ·And(SB), NOSPLIT, $0-8
283283
LOCK
284284
ANDL BX, (AX)
285285
RET
286+
287+
// func And32(addr *uint32, v uint32) old uint32
288+
TEXT ·And32(SB), NOSPLIT, $0-12
289+
MOVL ptr+0(FP), BX
290+
MOVL val+4(FP), CX
291+
casloop:
292+
MOVL CX, DX
293+
MOVL (BX), AX
294+
ANDL AX, DX
295+
LOCK
296+
CMPXCHGL DX, (BX)
297+
JNZ casloop
298+
MOVL AX, ret+8(FP)
299+
RET
300+
301+
// func Or32(addr *uint32, v uint32) old uint32
302+
TEXT ·Or32(SB), NOSPLIT, $0-12
303+
MOVL ptr+0(FP), BX
304+
MOVL val+4(FP), CX
305+
casloop:
306+
MOVL CX, DX
307+
MOVL (BX), AX
308+
ORL AX, DX
309+
LOCK
310+
CMPXCHGL DX, (BX)
311+
JNZ casloop
312+
MOVL AX, ret+8(FP)
313+
RET
314+
315+
// func And64(addr *uint64, v uint64) old uint64
316+
TEXT ·And64(SB), NOSPLIT, $0-20
317+
MOVL ptr+0(FP), BP
318+
// DI:SI = v
319+
MOVL val_lo+4(FP), SI
320+
MOVL val_hi+8(FP), DI
321+
// DX:AX = *addr
322+
MOVL 0(BP), AX
323+
MOVL 4(BP), DX
324+
casloop:
325+
// CX:BX = DX:AX (*addr) & DI:SI (mask)
326+
MOVL AX, BX
327+
MOVL DX, CX
328+
ANDL SI, BX
329+
ANDL DI, CX
330+
LOCK
331+
CMPXCHG8B 0(BP)
332+
JNZ casloop
333+
MOVL AX, ret_lo+12(FP)
334+
MOVL DX, ret_hi+16(FP)
335+
RET
336+
337+
338+
// func Or64(addr *uint64, v uint64) old uint64
339+
TEXT ·Or64(SB), NOSPLIT, $0-20
340+
MOVL ptr+0(FP), BP
341+
// DI:SI = v
342+
MOVL val_lo+4(FP), SI
343+
MOVL val_hi+8(FP), DI
344+
// DX:AX = *addr
345+
MOVL 0(BP), AX
346+
MOVL 4(BP), DX
347+
casloop:
348+
// CX:BX = DX:AX (*addr) | DI:SI (mask)
349+
MOVL AX, BX
350+
MOVL DX, CX
351+
ORL SI, BX
352+
ORL DI, CX
353+
LOCK
354+
CMPXCHG8B 0(BP)
355+
JNZ casloop
356+
MOVL AX, ret_lo+12(FP)
357+
MOVL DX, ret_hi+16(FP)
358+
RET
359+
360+
// func Anduintptr(addr *uintptr, v uintptr) old uintptr
361+
TEXT ·Anduintptr(SB), NOSPLIT, $0-12
362+
JMP ·And32(SB)
363+
364+
// func Oruintptr(addr *uintptr, v uintptr) old uintptr
365+
TEXT ·Oruintptr(SB), NOSPLIT, $0-12
366+
JMP ·Or32(SB)

src/runtime/internal/atomic/atomic_amd64.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,24 @@ func And(ptr *uint32, val uint32)
8484
//go:noescape
8585
func Or(ptr *uint32, val uint32)
8686

87+
//go:noescape
88+
func And32(ptr *uint32, val uint32) uint32
89+
90+
//go:noescape
91+
func Or32(ptr *uint32, val uint32) uint32
92+
93+
//go:noescape
94+
func And64(ptr *uint64, val uint64) uint64
95+
96+
//go:noescape
97+
func Or64(ptr *uint64, val uint64) uint64
98+
99+
//go:noescape
100+
func Anduintptr(ptr *uintptr, val uintptr) uintptr
101+
102+
//go:noescape
103+
func Oruintptr(ptr *uintptr, val uintptr) uintptr
104+
87105
// NOTE: Do not add atomicxor8 (XOR is not idempotent).
88106

89107
//go:noescape

src/runtime/internal/atomic/atomic_amd64.s

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,3 +223,67 @@ TEXT ·And(SB), NOSPLIT, $0-12
223223
LOCK
224224
ANDL BX, (AX)
225225
RET
226+
227+
// func Or32(addr *uint32, v uint32) old uint32
228+
TEXT ·Or32(SB), NOSPLIT, $0-20
229+
MOVQ ptr+0(FP), BX
230+
MOVL val+8(FP), CX
231+
casloop:
232+
MOVL CX, DX
233+
MOVL (BX), AX
234+
ORL AX, DX
235+
LOCK
236+
CMPXCHGL DX, (BX)
237+
JNZ casloop
238+
MOVL AX, ret+16(FP)
239+
RET
240+
241+
// func And32(addr *uint32, v uint32) old uint32
242+
TEXT ·And32(SB), NOSPLIT, $0-20
243+
MOVQ ptr+0(FP), BX
244+
MOVL val+8(FP), CX
245+
casloop:
246+
MOVL CX, DX
247+
MOVL (BX), AX
248+
ANDL AX, DX
249+
LOCK
250+
CMPXCHGL DX, (BX)
251+
JNZ casloop
252+
MOVL AX, ret+16(FP)
253+
RET
254+
255+
// func Or64(addr *uint64, v uint64) old uint64
256+
TEXT ·Or64(SB), NOSPLIT, $0-24
257+
MOVQ ptr+0(FP), BX
258+
MOVQ val+8(FP), CX
259+
casloop:
260+
MOVQ CX, DX
261+
MOVQ (BX), AX
262+
ORQ AX, DX
263+
LOCK
264+
CMPXCHGQ DX, (BX)
265+
JNZ casloop
266+
MOVQ AX, ret+16(FP)
267+
RET
268+
269+
// func And64(addr *uint64, v uint64) old uint64
270+
TEXT ·And64(SB), NOSPLIT, $0-24
271+
MOVQ ptr+0(FP), BX
272+
MOVQ val+8(FP), CX
273+
casloop:
274+
MOVQ CX, DX
275+
MOVQ (BX), AX
276+
ANDQ AX, DX
277+
LOCK
278+
CMPXCHGQ DX, (BX)
279+
JNZ casloop
280+
MOVQ AX, ret+16(FP)
281+
RET
282+
283+
// func Anduintptr(addr *uintptr, v uintptr) old uintptr
284+
TEXT ·Anduintptr(SB), NOSPLIT, $0-24
285+
JMP ·And64(SB)
286+
287+
// func Oruintptr(addr *uintptr, v uintptr) old uintptr
288+
TEXT ·Oruintptr(SB), NOSPLIT, $0-24
289+
JMP ·Or64(SB)

src/runtime/internal/atomic/atomic_andor_test.go

Lines changed: 81 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//go:build ppc64 || ppc64le || riscv64 || wasm
1+
//go:build 386 || amd64 || ppc64 || ppc64le || riscv64 || wasm
22

33
//
44
// Copyright 2023 The Go Authors. All rights reserved.
@@ -167,3 +167,83 @@ func TestOr64(t *testing.T) {
167167
}
168168
}
169169
}
170+
171+
func BenchmarkAnd32(b *testing.B) {
172+
var x [128]uint32 // give x its own cache line
173+
sink = &x
174+
for i := 0; i < b.N; i++ {
175+
atomic.And32(&x[63], uint32(i))
176+
}
177+
}
178+
179+
func BenchmarkAnd32Parallel(b *testing.B) {
180+
var x [128]uint32 // give x its own cache line
181+
sink = &x
182+
b.RunParallel(func(pb *testing.PB) {
183+
i := uint32(0)
184+
for pb.Next() {
185+
atomic.And32(&x[63], i)
186+
i++
187+
}
188+
})
189+
}
190+
191+
func BenchmarkAnd64(b *testing.B) {
192+
var x [128]uint64 // give x its own cache line
193+
sink = &x
194+
for i := 0; i < b.N; i++ {
195+
atomic.And64(&x[63], uint64(i))
196+
}
197+
}
198+
199+
func BenchmarkAnd64Parallel(b *testing.B) {
200+
var x [128]uint64 // give x its own cache line
201+
sink = &x
202+
b.RunParallel(func(pb *testing.PB) {
203+
i := uint64(0)
204+
for pb.Next() {
205+
atomic.And64(&x[63], i)
206+
i++
207+
}
208+
})
209+
}
210+
211+
func BenchmarkOr32(b *testing.B) {
212+
var x [128]uint32 // give x its own cache line
213+
sink = &x
214+
for i := 0; i < b.N; i++ {
215+
atomic.Or32(&x[63], uint32(i))
216+
}
217+
}
218+
219+
func BenchmarkOr32Parallel(b *testing.B) {
220+
var x [128]uint32 // give x its own cache line
221+
sink = &x
222+
b.RunParallel(func(pb *testing.PB) {
223+
i := uint32(0)
224+
for pb.Next() {
225+
atomic.Or32(&x[63], i)
226+
i++
227+
}
228+
})
229+
}
230+
231+
func BenchmarkOr64(b *testing.B) {
232+
var x [128]uint64 // give x its own cache line
233+
sink = &x
234+
for i := 0; i < b.N; i++ {
235+
atomic.Or64(&x[63], uint64(i))
236+
}
237+
}
238+
239+
func BenchmarkOr64Parallel(b *testing.B) {
240+
var x [128]uint64 // give x its own cache line
241+
sink = &x
242+
b.RunParallel(func(pb *testing.PB) {
243+
i := uint64(0)
244+
for pb.Next() {
245+
atomic.Or64(&x[63], i)
246+
i++
247+
}
248+
})
249+
}

0 commit comments

Comments
 (0)