Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 592e45f

Browse files
committed
[X86] Add test cases to show missed opportunity to use RMW for atomic_load+sub+atomic_store.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@338929 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent c2be4a7 commit 592e45f

File tree

1 file changed

+212
-30
lines changed

1 file changed

+212
-30
lines changed

test/CodeGen/X86/atomic_mi.ll

Lines changed: 212 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,188 @@ define void @add_32r_seq_cst(i32* %p, i32 %v) {
457457
ret void
458458
}
459459

460+
; ----- SUB -----
461+
462+
define void @sub_8r(i8* %p, i8 %v) {
463+
; X64-LABEL: sub_8r:
464+
; X64: # %bb.0:
465+
; X64-NEXT: movb (%rdi), %al
466+
; X64-NEXT: subb %sil, %al
467+
; X64-NEXT: movb %al, (%rdi)
468+
; X64-NEXT: retq
469+
;
470+
; X32-LABEL: sub_8r:
471+
; X32: # %bb.0:
472+
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
473+
; X32-NEXT: movb (%eax), %cl
474+
; X32-NEXT: subb {{[0-9]+}}(%esp), %cl
475+
; X32-NEXT: movb %cl, (%eax)
476+
; X32-NEXT: retl
477+
%1 = load atomic i8, i8* %p seq_cst, align 1
478+
%2 = sub i8 %1, %v
479+
store atomic i8 %2, i8* %p release, align 1
480+
ret void
481+
}
482+
483+
define void @sub_16r(i16* %p, i16 %v) {
484+
; Currently the transformation is not done on 16 bit accesses, as the backend
485+
; treat 16 bit arithmetic as expensive on X86/X86_64.
486+
; X64-LABEL: sub_16r:
487+
; X64: # %bb.0:
488+
; X64-NEXT: movzwl (%rdi), %eax
489+
; X64-NEXT: subw %si, %ax
490+
; X64-NEXT: movw %ax, (%rdi)
491+
; X64-NEXT: retq
492+
;
493+
; X32-LABEL: sub_16r:
494+
; X32: # %bb.0:
495+
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
496+
; X32-NEXT: movzwl (%eax), %ecx
497+
; X32-NEXT: subw {{[0-9]+}}(%esp), %cx
498+
; X32-NEXT: movw %cx, (%eax)
499+
; X32-NEXT: retl
500+
%1 = load atomic i16, i16* %p acquire, align 2
501+
%2 = sub i16 %1, %v
502+
store atomic i16 %2, i16* %p release, align 2
503+
ret void
504+
}
505+
506+
define void @sub_32r(i32* %p, i32 %v) {
507+
; X64-LABEL: sub_32r:
508+
; X64: # %bb.0:
509+
; X64-NEXT: movl (%rdi), %eax
510+
; X64-NEXT: subl %esi, %eax
511+
; X64-NEXT: movl %eax, (%rdi)
512+
; X64-NEXT: retq
513+
;
514+
; X32-LABEL: sub_32r:
515+
; X32: # %bb.0:
516+
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
517+
; X32-NEXT: movl (%eax), %ecx
518+
; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx
519+
; X32-NEXT: movl %ecx, (%eax)
520+
; X32-NEXT: retl
521+
%1 = load atomic i32, i32* %p acquire, align 4
522+
%2 = sub i32 %1, %v
523+
store atomic i32 %2, i32* %p monotonic, align 4
524+
ret void
525+
}
526+
527+
; The following is a corner case where the load is subed to itself. The pattern
528+
; matching should not fold this. We only test with 32-bit sub, but the same
529+
; applies to other sizes and operations.
530+
define void @sub_32r_self(i32* %p) {
531+
; X64-LABEL: sub_32r_self:
532+
; X64: # %bb.0:
533+
; X64-NEXT: movl (%rdi), %eax
534+
; X64-NEXT: movl $0, (%rdi)
535+
; X64-NEXT: retq
536+
;
537+
; X32-LABEL: sub_32r_self:
538+
; X32: # %bb.0:
539+
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
540+
; X32-NEXT: movl (%eax), %ecx
541+
; X32-NEXT: movl $0, (%eax)
542+
; X32-NEXT: retl
543+
%1 = load atomic i32, i32* %p acquire, align 4
544+
%2 = sub i32 %1, %1
545+
store atomic i32 %2, i32* %p monotonic, align 4
546+
ret void
547+
}
548+
549+
; The following is a corner case where the load's result is returned. The
550+
; optimizer isn't allowed to duplicate the load because it's atomic.
551+
define i32 @sub_32r_ret_load(i32* %p, i32 %v) {
552+
; X64-LABEL: sub_32r_ret_load:
553+
; X64: # %bb.0:
554+
; X64-NEXT: movl (%rdi), %eax
555+
; X64-NEXT: movl %eax, %ecx
556+
; X64-NEXT: subl %esi, %ecx
557+
; X64-NEXT: movl %ecx, (%rdi)
558+
; X64-NEXT: retq
559+
;
560+
; X32-LABEL: sub_32r_ret_load:
561+
; X32: # %bb.0:
562+
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
563+
; X32-NEXT: movl (%ecx), %eax
564+
; X32-NEXT: movl %eax, %edx
565+
; X32-NEXT: subl {{[0-9]+}}(%esp), %edx
566+
; X32-NEXT: movl %edx, (%ecx)
567+
; X32-NEXT: retl
568+
; More code here, we just don't want it to load from P.
569+
%1 = load atomic i32, i32* %p acquire, align 4
570+
%2 = sub i32 %1, %v
571+
store atomic i32 %2, i32* %p monotonic, align 4
572+
ret i32 %1
573+
}
574+
575+
define void @sub_64r(i64* %p, i64 %v) {
576+
; X64-LABEL: sub_64r:
577+
; X64: # %bb.0:
578+
; X64-NEXT: movq (%rdi), %rax
579+
; X64-NEXT: subq %rsi, %rax
580+
; X64-NEXT: movq %rax, (%rdi)
581+
; X64-NEXT: retq
582+
;
583+
; X32-LABEL: sub_64r:
584+
; X32: # %bb.0:
585+
; X32-NEXT: pushl %ebx
586+
; X32-NEXT: .cfi_def_cfa_offset 8
587+
; X32-NEXT: pushl %esi
588+
; X32-NEXT: .cfi_def_cfa_offset 12
589+
; X32-NEXT: .cfi_offset %esi, -12
590+
; X32-NEXT: .cfi_offset %ebx, -8
591+
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
592+
; X32-NEXT: xorl %eax, %eax
593+
; X32-NEXT: xorl %edx, %edx
594+
; X32-NEXT: xorl %ecx, %ecx
595+
; X32-NEXT: xorl %ebx, %ebx
596+
; X32-NEXT: lock cmpxchg8b (%esi)
597+
; X32-NEXT: movl %edx, %ecx
598+
; X32-NEXT: movl %eax, %ebx
599+
; X32-NEXT: subl {{[0-9]+}}(%esp), %ebx
600+
; X32-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
601+
; X32-NEXT: movl (%esi), %eax
602+
; X32-NEXT: movl 4(%esi), %edx
603+
; X32-NEXT: .p2align 4, 0x90
604+
; X32-NEXT: .LBB23_1: # %atomicrmw.start
605+
; X32-NEXT: # =>This Inner Loop Header: Depth=1
606+
; X32-NEXT: lock cmpxchg8b (%esi)
607+
; X32-NEXT: jne .LBB23_1
608+
; X32-NEXT: # %bb.2: # %atomicrmw.end
609+
; X32-NEXT: popl %esi
610+
; X32-NEXT: .cfi_def_cfa_offset 8
611+
; X32-NEXT: popl %ebx
612+
; X32-NEXT: .cfi_def_cfa_offset 4
613+
; X32-NEXT: retl
614+
; We do not check X86-32 as it cannot do 'subq'.
615+
%1 = load atomic i64, i64* %p acquire, align 8
616+
%2 = sub i64 %1, %v
617+
store atomic i64 %2, i64* %p release, align 8
618+
ret void
619+
}
620+
621+
define void @sub_32r_seq_cst(i32* %p, i32 %v) {
622+
; X64-LABEL: sub_32r_seq_cst:
623+
; X64: # %bb.0:
624+
; X64-NEXT: movl (%rdi), %eax
625+
; X64-NEXT: subl %esi, %eax
626+
; X64-NEXT: xchgl %eax, (%rdi)
627+
; X64-NEXT: retq
628+
;
629+
; X32-LABEL: sub_32r_seq_cst:
630+
; X32: # %bb.0:
631+
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
632+
; X32-NEXT: movl (%eax), %ecx
633+
; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx
634+
; X32-NEXT: xchgl %ecx, (%eax)
635+
; X32-NEXT: retl
636+
%1 = load atomic i32, i32* %p monotonic, align 4
637+
%2 = sub i32 %1, %v
638+
store atomic i32 %2, i32* %p seq_cst, align 4
639+
ret void
640+
}
641+
460642
; ----- AND -----
461643

462644
define void @and_8i(i8* %p) {
@@ -593,11 +775,11 @@ define void @and_64i(i64* %p) {
593775
; X32-NEXT: movl (%esi), %eax
594776
; X32-NEXT: movl 4(%esi), %edx
595777
; X32-NEXT: .p2align 4, 0x90
596-
; X32-NEXT: .LBB24_1: # %atomicrmw.start
778+
; X32-NEXT: .LBB31_1: # %atomicrmw.start
597779
; X32-NEXT: # =>This Inner Loop Header: Depth=1
598780
; X32-NEXT: xorl %ecx, %ecx
599781
; X32-NEXT: lock cmpxchg8b (%esi)
600-
; X32-NEXT: jne .LBB24_1
782+
; X32-NEXT: jne .LBB31_1
601783
; X32-NEXT: # %bb.2: # %atomicrmw.end
602784
; X32-NEXT: popl %esi
603785
; X32-NEXT: .cfi_def_cfa_offset 8
@@ -638,10 +820,10 @@ define void @and_64r(i64* %p, i64 %v) {
638820
; X32-NEXT: movl (%esi), %eax
639821
; X32-NEXT: movl 4(%esi), %edx
640822
; X32-NEXT: .p2align 4, 0x90
641-
; X32-NEXT: .LBB25_1: # %atomicrmw.start
823+
; X32-NEXT: .LBB32_1: # %atomicrmw.start
642824
; X32-NEXT: # =>This Inner Loop Header: Depth=1
643825
; X32-NEXT: lock cmpxchg8b (%esi)
644-
; X32-NEXT: jne .LBB25_1
826+
; X32-NEXT: jne .LBB32_1
645827
; X32-NEXT: # %bb.2: # %atomicrmw.end
646828
; X32-NEXT: popl %esi
647829
; X32-NEXT: .cfi_def_cfa_offset 8
@@ -830,10 +1012,10 @@ define void @or_64i(i64* %p) {
8301012
; X32-NEXT: movl (%esi), %eax
8311013
; X32-NEXT: movl 4(%esi), %edx
8321014
; X32-NEXT: .p2align 4, 0x90
833-
; X32-NEXT: .LBB34_1: # %atomicrmw.start
1015+
; X32-NEXT: .LBB41_1: # %atomicrmw.start
8341016
; X32-NEXT: # =>This Inner Loop Header: Depth=1
8351017
; X32-NEXT: lock cmpxchg8b (%esi)
836-
; X32-NEXT: jne .LBB34_1
1018+
; X32-NEXT: jne .LBB41_1
8371019
; X32-NEXT: # %bb.2: # %atomicrmw.end
8381020
; X32-NEXT: popl %esi
8391021
; X32-NEXT: .cfi_def_cfa_offset 8
@@ -874,10 +1056,10 @@ define void @or_64r(i64* %p, i64 %v) {
8741056
; X32-NEXT: movl (%esi), %eax
8751057
; X32-NEXT: movl 4(%esi), %edx
8761058
; X32-NEXT: .p2align 4, 0x90
877-
; X32-NEXT: .LBB35_1: # %atomicrmw.start
1059+
; X32-NEXT: .LBB42_1: # %atomicrmw.start
8781060
; X32-NEXT: # =>This Inner Loop Header: Depth=1
8791061
; X32-NEXT: lock cmpxchg8b (%esi)
880-
; X32-NEXT: jne .LBB35_1
1062+
; X32-NEXT: jne .LBB42_1
8811063
; X32-NEXT: # %bb.2: # %atomicrmw.end
8821064
; X32-NEXT: popl %esi
8831065
; X32-NEXT: .cfi_def_cfa_offset 8
@@ -1066,10 +1248,10 @@ define void @xor_64i(i64* %p) {
10661248
; X32-NEXT: movl (%esi), %eax
10671249
; X32-NEXT: movl 4(%esi), %edx
10681250
; X32-NEXT: .p2align 4, 0x90
1069-
; X32-NEXT: .LBB44_1: # %atomicrmw.start
1251+
; X32-NEXT: .LBB51_1: # %atomicrmw.start
10701252
; X32-NEXT: # =>This Inner Loop Header: Depth=1
10711253
; X32-NEXT: lock cmpxchg8b (%esi)
1072-
; X32-NEXT: jne .LBB44_1
1254+
; X32-NEXT: jne .LBB51_1
10731255
; X32-NEXT: # %bb.2: # %atomicrmw.end
10741256
; X32-NEXT: popl %esi
10751257
; X32-NEXT: .cfi_def_cfa_offset 8
@@ -1110,10 +1292,10 @@ define void @xor_64r(i64* %p, i64 %v) {
11101292
; X32-NEXT: movl (%esi), %eax
11111293
; X32-NEXT: movl 4(%esi), %edx
11121294
; X32-NEXT: .p2align 4, 0x90
1113-
; X32-NEXT: .LBB45_1: # %atomicrmw.start
1295+
; X32-NEXT: .LBB52_1: # %atomicrmw.start
11141296
; X32-NEXT: # =>This Inner Loop Header: Depth=1
11151297
; X32-NEXT: lock cmpxchg8b (%esi)
1116-
; X32-NEXT: jne .LBB45_1
1298+
; X32-NEXT: jne .LBB52_1
11171299
; X32-NEXT: # %bb.2: # %atomicrmw.end
11181300
; X32-NEXT: popl %esi
11191301
; X32-NEXT: .cfi_def_cfa_offset 8
@@ -1266,10 +1448,10 @@ define void @inc_64(i64* %p) {
12661448
; X32-NEXT: movl (%esi), %eax
12671449
; X32-NEXT: movl 4(%esi), %edx
12681450
; X32-NEXT: .p2align 4, 0x90
1269-
; X32-NEXT: .LBB51_1: # %atomicrmw.start
1451+
; X32-NEXT: .LBB58_1: # %atomicrmw.start
12701452
; X32-NEXT: # =>This Inner Loop Header: Depth=1
12711453
; X32-NEXT: lock cmpxchg8b (%esi)
1272-
; X32-NEXT: jne .LBB51_1
1454+
; X32-NEXT: jne .LBB58_1
12731455
; X32-NEXT: # %bb.2: # %atomicrmw.end
12741456
; X32-NEXT: popl %esi
12751457
; X32-NEXT: .cfi_def_cfa_offset 8
@@ -1413,10 +1595,10 @@ define void @dec_64(i64* %p) {
14131595
; X32-NEXT: movl (%esi), %eax
14141596
; X32-NEXT: movl 4(%esi), %edx
14151597
; X32-NEXT: .p2align 4, 0x90
1416-
; X32-NEXT: .LBB56_1: # %atomicrmw.start
1598+
; X32-NEXT: .LBB63_1: # %atomicrmw.start
14171599
; X32-NEXT: # =>This Inner Loop Header: Depth=1
14181600
; X32-NEXT: lock cmpxchg8b (%esi)
1419-
; X32-NEXT: jne .LBB56_1
1601+
; X32-NEXT: jne .LBB63_1
14201602
; X32-NEXT: # %bb.2: # %atomicrmw.end
14211603
; X32-NEXT: popl %esi
14221604
; X32-NEXT: .cfi_def_cfa_offset 8
@@ -1545,10 +1727,10 @@ define void @not_64(i64* %p) {
15451727
; X32-NEXT: movl (%esi), %eax
15461728
; X32-NEXT: movl 4(%esi), %edx
15471729
; X32-NEXT: .p2align 4, 0x90
1548-
; X32-NEXT: .LBB61_1: # %atomicrmw.start
1730+
; X32-NEXT: .LBB68_1: # %atomicrmw.start
15491731
; X32-NEXT: # =>This Inner Loop Header: Depth=1
15501732
; X32-NEXT: lock cmpxchg8b (%esi)
1551-
; X32-NEXT: jne .LBB61_1
1733+
; X32-NEXT: jne .LBB68_1
15521734
; X32-NEXT: # %bb.2: # %atomicrmw.end
15531735
; X32-NEXT: popl %esi
15541736
; X32-NEXT: .cfi_def_cfa_offset 8
@@ -1672,11 +1854,11 @@ define void @neg_64(i64* %p) {
16721854
; X32-NEXT: movl (%edi), %eax
16731855
; X32-NEXT: movl 4(%edi), %edx
16741856
; X32-NEXT: .p2align 4, 0x90
1675-
; X32-NEXT: .LBB66_1: # %atomicrmw.start
1857+
; X32-NEXT: .LBB73_1: # %atomicrmw.start
16761858
; X32-NEXT: # =>This Inner Loop Header: Depth=1
16771859
; X32-NEXT: movl %esi, %ecx
16781860
; X32-NEXT: lock cmpxchg8b (%edi)
1679-
; X32-NEXT: jne .LBB66_1
1861+
; X32-NEXT: jne .LBB73_1
16801862
; X32-NEXT: # %bb.2: # %atomicrmw.end
16811863
; X32-NEXT: popl %esi
16821864
; X32-NEXT: .cfi_def_cfa_offset 12
@@ -1784,10 +1966,10 @@ define void @fadd_64r(double* %loc, double %val) {
17841966
; X32-NEXT: movl (%esi), %eax
17851967
; X32-NEXT: movl 4(%esi), %edx
17861968
; X32-NEXT: .p2align 4, 0x90
1787-
; X32-NEXT: .LBB69_1: # %atomicrmw.start
1969+
; X32-NEXT: .LBB76_1: # %atomicrmw.start
17881970
; X32-NEXT: # =>This Inner Loop Header: Depth=1
17891971
; X32-NEXT: lock cmpxchg8b (%esi)
1790-
; X32-NEXT: jne .LBB69_1
1972+
; X32-NEXT: jne .LBB76_1
17911973
; X32-NEXT: # %bb.2: # %atomicrmw.end
17921974
; X32-NEXT: leal -8(%ebp), %esp
17931975
; X32-NEXT: popl %esi
@@ -1874,10 +2056,10 @@ define void @fadd_64g() {
18742056
; X32-NEXT: movl glob64+4, %edx
18752057
; X32-NEXT: movl glob64, %eax
18762058
; X32-NEXT: .p2align 4, 0x90
1877-
; X32-NEXT: .LBB71_1: # %atomicrmw.start
2059+
; X32-NEXT: .LBB78_1: # %atomicrmw.start
18782060
; X32-NEXT: # =>This Inner Loop Header: Depth=1
18792061
; X32-NEXT: lock cmpxchg8b glob64
1880-
; X32-NEXT: jne .LBB71_1
2062+
; X32-NEXT: jne .LBB78_1
18812063
; X32-NEXT: # %bb.2: # %atomicrmw.end
18822064
; X32-NEXT: leal -4(%ebp), %esp
18832065
; X32-NEXT: popl %ebx
@@ -1961,10 +2143,10 @@ define void @fadd_64imm() {
19612143
; X32-NEXT: movl -559038737, %eax
19622144
; X32-NEXT: movl -559038733, %edx
19632145
; X32-NEXT: .p2align 4, 0x90
1964-
; X32-NEXT: .LBB73_1: # %atomicrmw.start
2146+
; X32-NEXT: .LBB80_1: # %atomicrmw.start
19652147
; X32-NEXT: # =>This Inner Loop Header: Depth=1
19662148
; X32-NEXT: lock cmpxchg8b -559038737
1967-
; X32-NEXT: jne .LBB73_1
2149+
; X32-NEXT: jne .LBB80_1
19682150
; X32-NEXT: # %bb.2: # %atomicrmw.end
19692151
; X32-NEXT: leal -4(%ebp), %esp
19702152
; X32-NEXT: popl %ebx
@@ -2048,10 +2230,10 @@ define void @fadd_64stack() {
20482230
; X32-NEXT: movl (%esp), %eax
20492231
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
20502232
; X32-NEXT: .p2align 4, 0x90
2051-
; X32-NEXT: .LBB75_1: # %atomicrmw.start
2233+
; X32-NEXT: .LBB82_1: # %atomicrmw.start
20522234
; X32-NEXT: # =>This Inner Loop Header: Depth=1
20532235
; X32-NEXT: lock cmpxchg8b (%esp)
2054-
; X32-NEXT: jne .LBB75_1
2236+
; X32-NEXT: jne .LBB82_1
20552237
; X32-NEXT: # %bb.2: # %atomicrmw.end
20562238
; X32-NEXT: leal -4(%ebp), %esp
20572239
; X32-NEXT: popl %ebx
@@ -2108,10 +2290,10 @@ define void @fadd_array(i64* %arg, double %arg1, i64 %arg2) {
21082290
; X32-NEXT: movl (%edi,%esi,8), %eax
21092291
; X32-NEXT: movl 4(%edi,%esi,8), %edx
21102292
; X32-NEXT: .p2align 4, 0x90
2111-
; X32-NEXT: .LBB76_1: # %atomicrmw.start
2293+
; X32-NEXT: .LBB83_1: # %atomicrmw.start
21122294
; X32-NEXT: # =>This Inner Loop Header: Depth=1
21132295
; X32-NEXT: lock cmpxchg8b (%edi,%esi,8)
2114-
; X32-NEXT: jne .LBB76_1
2296+
; X32-NEXT: jne .LBB83_1
21152297
; X32-NEXT: # %bb.2: # %atomicrmw.end
21162298
; X32-NEXT: leal -12(%ebp), %esp
21172299
; X32-NEXT: popl %esi

0 commit comments

Comments
 (0)