Skip to content

Commit b8c198a

Browse files
committed
LoongArch: Remove redundant barrier instructions before LL-SC loops
This is isomorphic to the LLVM changes [1-2]. On LoongArch, the LL and SC instructions has memory barrier semantics: - LL: <memory-barrier> + <load-exclusive> - SC: <store-conditional> + <memory-barrier> But the compare and swap operation is allowed to fail, and if it fails the SC instruction is not executed, thus the guarantee of acquiring semantics cannot be ensured. Therefore, an acquire barrier needs to be generated when failure_memorder includes an acquire operation. On CPUs implementing LoongArch v1.10 or later, "dbar 0b10100" is an acquire barrier; on CPUs implementing LoongArch v1.00, it is a full barrier. So it's always enough for acquire semantics. OTOH if an acquire semantic is not needed, we still needs the "dbar 0x700" as the load-load barrier like all LL-SC loops. [1]:llvm/llvm-project#67391 [2]:llvm/llvm-project#69339 gcc/ChangeLog: * config/loongarch/loongarch.cc (loongarch_memmodel_needs_release_fence): Remove. (loongarch_cas_failure_memorder_needs_acquire): New static function. (loongarch_print_operand): Redefine 'G' for the barrier on CAS failure. * config/loongarch/sync.md (atomic_cas_value_strong<mode>): Remove the redundant barrier before the LL instruction, and emit an acquire barrier on failure if needed by failure_memorder. (atomic_cas_value_cmp_and_7_<mode>): Likewise. (atomic_cas_value_add_7_<mode>): Remove the unnecessary barrier before the LL instruction. (atomic_cas_value_sub_7_<mode>): Likewise. (atomic_cas_value_and_7_<mode>): Likewise. (atomic_cas_value_xor_7_<mode>): Likewise. (atomic_cas_value_or_7_<mode>): Likewise. (atomic_cas_value_nand_7_<mode>): Likewise. (atomic_cas_value_exchange_7_<mode>): Likewise. gcc/testsuite/ChangeLog: * gcc.target/loongarch/cas-acquire.c: New test. (cherry picked from commit 4d86dc5)
1 parent b76c465 commit b8c198a

File tree

3 files changed

+119
-42
lines changed

3 files changed

+119
-42
lines changed

gcc/config/loongarch/loongarch.cc

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4321,27 +4321,27 @@ loongarch_memmodel_needs_rel_acq_fence (enum memmodel model)
43214321
}
43224322
}
43234323

4324-
/* Return true if a FENCE should be emitted to before a memory access to
4325-
implement the release portion of memory model MODEL. */
4324+
/* Return true if a FENCE should be emitted after a failed CAS to
4325+
implement the acquire semantic of failure_memorder. */
43264326

43274327
static bool
4328-
loongarch_memmodel_needs_release_fence (enum memmodel model)
4328+
loongarch_cas_failure_memorder_needs_acquire (enum memmodel model)
43294329
{
4330-
switch (model)
4330+
switch (memmodel_base (model))
43314331
{
4332+
case MEMMODEL_ACQUIRE:
43324333
case MEMMODEL_ACQ_REL:
43334334
case MEMMODEL_SEQ_CST:
4334-
case MEMMODEL_SYNC_SEQ_CST:
4335-
case MEMMODEL_RELEASE:
4336-
case MEMMODEL_SYNC_RELEASE:
43374335
return true;
43384336

4339-
case MEMMODEL_ACQUIRE:
4340-
case MEMMODEL_CONSUME:
4341-
case MEMMODEL_SYNC_ACQUIRE:
43424337
case MEMMODEL_RELAXED:
4338+
case MEMMODEL_RELEASE:
43434339
return false;
43444340

4341+
/* MEMMODEL_CONSUME is deliberately not handled because it's always
4342+
replaced by MEMMODEL_ACQUIRE as at now. If you see an ICE caused by
4343+
MEMMODEL_CONSUME, read the change (re)introducing it carefully and
4344+
decide what to do. See PR 59448 and get_memmodel in builtins.cc. */
43454345
default:
43464346
gcc_unreachable ();
43474347
}
@@ -4368,7 +4368,8 @@ loongarch_memmodel_needs_release_fence (enum memmodel model)
43684368
'V' Print exact log2 of CONST_INT OP element 0 of a replicated
43694369
CONST_VECTOR in decimal.
43704370
'A' Print a _DB suffix if the memory model requires a release.
4371-
'G' Print a DBAR insn if the memory model requires a release.
4371+
'G' Print a DBAR insn for CAS failure (with an acquire semantic if
4372+
needed, otherwise a simple load-load barrier).
43724373
'i' Print i if the operand is not a register. */
43734374

43744375
static void
@@ -4489,8 +4490,11 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
44894490
break;
44904491

44914492
case 'G':
4492-
if (loongarch_memmodel_needs_release_fence ((enum memmodel) INTVAL (op)))
4493-
fputs ("dbar\t0", file);
4493+
if (loongarch_cas_failure_memorder_needs_acquire (
4494+
memmodel_from_int (INTVAL (op))))
4495+
fputs ("dbar\t0b10100", file);
4496+
else
4497+
fputs ("dbar\t0x700", file);
44944498
break;
44954499

44964500
case 'i':

gcc/config/loongarch/sync.md

Lines changed: 20 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -129,19 +129,18 @@
129129
(clobber (match_scratch:GPR 6 "=&r"))]
130130
""
131131
{
132-
return "%G5\\n\\t"
133-
"1:\\n\\t"
132+
return "1:\\n\\t"
134133
"ll.<amo>\\t%0,%1\\n\\t"
135134
"bne\\t%0,%z2,2f\\n\\t"
136135
"or%i3\\t%6,$zero,%3\\n\\t"
137136
"sc.<amo>\\t%6,%1\\n\\t"
138-
"beq\\t$zero,%6,1b\\n\\t"
137+
"beqz\\t%6,1b\\n\\t"
139138
"b\\t3f\\n\\t"
140139
"2:\\n\\t"
141-
"dbar\\t0x700\\n\\t"
140+
"%G5\\n\\t"
142141
"3:\\n\\t";
143142
}
144-
[(set (attr "length") (const_int 32))])
143+
[(set (attr "length") (const_int 28))])
145144

146145
(define_expand "atomic_compare_and_swap<mode>"
147146
[(match_operand:SI 0 "register_operand" "") ;; bool output
@@ -234,8 +233,7 @@
234233
(clobber (match_scratch:GPR 7 "=&r"))]
235234
""
236235
{
237-
return "%G6\\n\\t"
238-
"1:\\n\\t"
236+
return "1:\\n\\t"
239237
"ll.<amo>\\t%0,%1\\n\\t"
240238
"and\\t%7,%0,%2\\n\\t"
241239
"bne\\t%7,%z4,2f\\n\\t"
@@ -245,10 +243,10 @@
245243
"beq\\t$zero,%7,1b\\n\\t"
246244
"b\\t3f\\n\\t"
247245
"2:\\n\\t"
248-
"dbar\\t0x700\\n\\t"
246+
"%G6\\n\\t"
249247
"3:\\n\\t";
250248
}
251-
[(set (attr "length") (const_int 40))])
249+
[(set (attr "length") (const_int 36))])
252250

253251
(define_expand "atomic_compare_and_swap<mode>"
254252
[(match_operand:SI 0 "register_operand" "") ;; bool output
@@ -303,8 +301,7 @@
303301
(clobber (match_scratch:GPR 8 "=&r"))]
304302
""
305303
{
306-
return "%G6\\n\\t"
307-
"1:\\n\\t"
304+
return "1:\\n\\t"
308305
"ll.<amo>\\t%0,%1\\n\\t"
309306
"and\\t%7,%0,%3\\n\\t"
310307
"add.w\\t%8,%0,%z5\\n\\t"
@@ -314,7 +311,7 @@
314311
"beq\\t$zero,%7,1b";
315312
}
316313

317-
[(set (attr "length") (const_int 32))])
314+
[(set (attr "length") (const_int 28))])
318315

319316
(define_insn "atomic_cas_value_sub_7_<mode>"
320317
[(set (match_operand:GPR 0 "register_operand" "=&r") ;; res
@@ -330,8 +327,7 @@
330327
(clobber (match_scratch:GPR 8 "=&r"))]
331328
""
332329
{
333-
return "%G6\\n\\t"
334-
"1:\\n\\t"
330+
return "1:\\n\\t"
335331
"ll.<amo>\\t%0,%1\\n\\t"
336332
"and\\t%7,%0,%3\\n\\t"
337333
"sub.w\\t%8,%0,%z5\\n\\t"
@@ -340,7 +336,7 @@
340336
"sc.<amo>\\t%7,%1\\n\\t"
341337
"beq\\t$zero,%7,1b";
342338
}
343-
[(set (attr "length") (const_int 32))])
339+
[(set (attr "length") (const_int 28))])
344340

345341
(define_insn "atomic_cas_value_and_7_<mode>"
346342
[(set (match_operand:GPR 0 "register_operand" "=&r") ;; res
@@ -356,8 +352,7 @@
356352
(clobber (match_scratch:GPR 8 "=&r"))]
357353
""
358354
{
359-
return "%G6\\n\\t"
360-
"1:\\n\\t"
355+
return "1:\\n\\t"
361356
"ll.<amo>\\t%0,%1\\n\\t"
362357
"and\\t%7,%0,%3\\n\\t"
363358
"and\\t%8,%0,%z5\\n\\t"
@@ -366,7 +361,7 @@
366361
"sc.<amo>\\t%7,%1\\n\\t"
367362
"beq\\t$zero,%7,1b";
368363
}
369-
[(set (attr "length") (const_int 32))])
364+
[(set (attr "length") (const_int 28))])
370365

371366
(define_insn "atomic_cas_value_xor_7_<mode>"
372367
[(set (match_operand:GPR 0 "register_operand" "=&r") ;; res
@@ -382,8 +377,7 @@
382377
(clobber (match_scratch:GPR 8 "=&r"))]
383378
""
384379
{
385-
return "%G6\\n\\t"
386-
"1:\\n\\t"
380+
return "1:\\n\\t"
387381
"ll.<amo>\\t%0,%1\\n\\t"
388382
"and\\t%7,%0,%3\\n\\t"
389383
"xor\\t%8,%0,%z5\\n\\t"
@@ -393,7 +387,7 @@
393387
"beq\\t$zero,%7,1b";
394388
}
395389

396-
[(set (attr "length") (const_int 32))])
390+
[(set (attr "length") (const_int 28))])
397391

398392
(define_insn "atomic_cas_value_or_7_<mode>"
399393
[(set (match_operand:GPR 0 "register_operand" "=&r") ;; res
@@ -409,8 +403,7 @@
409403
(clobber (match_scratch:GPR 8 "=&r"))]
410404
""
411405
{
412-
return "%G6\\n\\t"
413-
"1:\\n\\t"
406+
return "1:\\n\\t"
414407
"ll.<amo>\\t%0,%1\\n\\t"
415408
"and\\t%7,%0,%3\\n\\t"
416409
"or\\t%8,%0,%z5\\n\\t"
@@ -420,7 +413,7 @@
420413
"beq\\t$zero,%7,1b";
421414
}
422415

423-
[(set (attr "length") (const_int 32))])
416+
[(set (attr "length") (const_int 28))])
424417

425418
(define_insn "atomic_cas_value_nand_7_<mode>"
426419
[(set (match_operand:GPR 0 "register_operand" "=&r") ;; res
@@ -436,8 +429,7 @@
436429
(clobber (match_scratch:GPR 8 "=&r"))]
437430
""
438431
{
439-
return "%G6\\n\\t"
440-
"1:\\n\\t"
432+
return "1:\\n\\t"
441433
"ll.<amo>\\t%0,%1\\n\\t"
442434
"and\\t%7,%0,%3\\n\\t"
443435
"and\\t%8,%0,%z5\\n\\t"
@@ -446,7 +438,7 @@
446438
"sc.<amo>\\t%7,%1\\n\\t"
447439
"beq\\t$zero,%7,1b";
448440
}
449-
[(set (attr "length") (const_int 32))])
441+
[(set (attr "length") (const_int 28))])
450442

451443
(define_insn "atomic_cas_value_exchange_7_<mode>"
452444
[(set (match_operand:GPR 0 "register_operand" "=&r")
@@ -461,8 +453,7 @@
461453
(clobber (match_scratch:GPR 7 "=&r"))]
462454
""
463455
{
464-
return "%G6\\n\\t"
465-
"1:\\n\\t"
456+
return "1:\\n\\t"
466457
"ll.<amo>\\t%0,%1\\n\\t"
467458
"and\\t%7,%0,%z3\\n\\t"
468459
"or%i5\\t%7,%7,%5\\n\\t"
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
/* { dg-do run } */
2+
/* { dg-require-effective-target c99_runtime } */
3+
/* { dg-require-effective-target pthread } */
4+
/* { dg-options "-std=c99 -pthread" } */
5+
6+
/* https://github.com/llvm/llvm-project/pull/67391#issuecomment-1752403934
7+
reported that this had failed with GCC and 3A6000. */
8+
9+
#include <pthread.h>
10+
#include <stdatomic.h>
11+
#include <stdbool.h>
12+
#include <stdio.h>
13+
14+
static unsigned int tags[32];
15+
static unsigned int vals[32];
16+
17+
static void *
18+
writer_entry (void *data)
19+
{
20+
atomic_uint *pt = (atomic_uint *)tags;
21+
atomic_uint *pv = (atomic_uint *)vals;
22+
23+
for (unsigned int n = 1; n < 10000; n++)
24+
{
25+
atomic_store_explicit (&pv[n & 31], n, memory_order_release);
26+
atomic_store_explicit (&pt[n & 31], n, memory_order_release);
27+
}
28+
29+
return NULL;
30+
}
31+
32+
static void *
33+
reader_entry (void *data)
34+
{
35+
atomic_uint *pt = (atomic_uint *)tags;
36+
atomic_uint *pv = (atomic_uint *)vals;
37+
int i;
38+
39+
for (;;)
40+
{
41+
for (i = 0; i < 32; i++)
42+
{
43+
unsigned int tag = 0;
44+
bool res;
45+
46+
res = atomic_compare_exchange_weak_explicit (
47+
&pt[i], &tag, 0, memory_order_acquire, memory_order_acquire);
48+
if (!res)
49+
{
50+
unsigned int val;
51+
52+
val = atomic_load_explicit (&pv[i], memory_order_relaxed);
53+
if (val < tag)
54+
__builtin_trap ();
55+
}
56+
}
57+
}
58+
59+
return NULL;
60+
}
61+
62+
int
63+
main (int argc, char *argv[])
64+
{
65+
pthread_t writer;
66+
pthread_t reader;
67+
int res;
68+
69+
res = pthread_create (&writer, NULL, writer_entry, NULL);
70+
if (res < 0)
71+
__builtin_trap ();
72+
73+
res = pthread_create (&reader, NULL, reader_entry, NULL);
74+
if (res < 0)
75+
__builtin_trap ();
76+
77+
res = pthread_join (writer, NULL);
78+
if (res < 0)
79+
__builtin_trap ();
80+
81+
return 0;
82+
}

0 commit comments

Comments
 (0)