Skip to content

Commit 7119a0f

Browse files
authored
[AtomicExpandPass] Match isIdempotentRMW with InstcombineRMW (#142277)
Add umin, smin, umax, smax to isIdempotentRMW
1 parent 00eb22f commit 7119a0f

File tree

2 files changed

+339
-3
lines changed

2 files changed

+339
-3
lines changed

llvm/lib/CodeGen/AtomicExpandPass.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1570,20 +1570,27 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
15701570
}
15711571

15721572
bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) {
1573+
// TODO: Add floating point support.
15731574
auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
15741575
if (!C)
15751576
return false;
15761577

1577-
AtomicRMWInst::BinOp Op = RMWI->getOperation();
1578-
switch (Op) {
1578+
switch (RMWI->getOperation()) {
15791579
case AtomicRMWInst::Add:
15801580
case AtomicRMWInst::Sub:
15811581
case AtomicRMWInst::Or:
15821582
case AtomicRMWInst::Xor:
15831583
return C->isZero();
15841584
case AtomicRMWInst::And:
15851585
return C->isMinusOne();
1586-
// FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
1586+
case AtomicRMWInst::Min:
1587+
return C->isMaxValue(true);
1588+
case AtomicRMWInst::Max:
1589+
return C->isMinValue(true);
1590+
case AtomicRMWInst::UMin:
1591+
return C->isMaxValue(false);
1592+
case AtomicRMWInst::UMax:
1593+
return C->isMinValue(false);
15871594
default:
15881595
return false;
15891596
}

llvm/test/CodeGen/X86/atomic-idempotent.ll

Lines changed: 329 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -622,4 +622,333 @@ define void @or8_nouse_seq_cst(ptr %p) #0 {
622622
ret void
623623
}
624624

625+
define void @atomic_umin_uint_max(ptr %addr) #0 {
626+
; X64-LABEL: atomic_umin_uint_max:
627+
; X64: # %bb.0:
628+
; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
629+
; X64-NEXT: movl (%rdi), %eax
630+
; X64-NEXT: retq
631+
;
632+
; X86-SSE2-LABEL: atomic_umin_uint_max:
633+
; X86-SSE2: # %bb.0:
634+
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
635+
; X86-SSE2-NEXT: mfence
636+
; X86-SSE2-NEXT: movl (%eax), %eax
637+
; X86-SSE2-NEXT: retl
638+
;
639+
; X86-SLM-LABEL: atomic_umin_uint_max:
640+
; X86-SLM: # %bb.0:
641+
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %eax
642+
; X86-SLM-NEXT: lock orl $0, (%esp)
643+
; X86-SLM-NEXT: movl (%eax), %eax
644+
; X86-SLM-NEXT: retl
645+
;
646+
; X86-ATOM-LABEL: atomic_umin_uint_max:
647+
; X86-ATOM: # %bb.0:
648+
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
649+
; X86-ATOM-NEXT: lock orl $0, (%esp)
650+
; X86-ATOM-NEXT: movl (%eax), %eax
651+
; X86-ATOM-NEXT: nop
652+
; X86-ATOM-NEXT: nop
653+
; X86-ATOM-NEXT: retl
654+
atomicrmw umin ptr %addr, i32 -1 seq_cst
655+
ret void
656+
}
657+
658+
define void @atomic_umax_zero(ptr %addr) #0 {
659+
; X64-LABEL: atomic_umax_zero:
660+
; X64: # %bb.0:
661+
; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
662+
; X64-NEXT: movl (%rdi), %eax
663+
; X64-NEXT: retq
664+
;
665+
; X86-SSE2-LABEL: atomic_umax_zero:
666+
; X86-SSE2: # %bb.0:
667+
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
668+
; X86-SSE2-NEXT: mfence
669+
; X86-SSE2-NEXT: movl (%eax), %eax
670+
; X86-SSE2-NEXT: retl
671+
;
672+
; X86-SLM-LABEL: atomic_umax_zero:
673+
; X86-SLM: # %bb.0:
674+
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %eax
675+
; X86-SLM-NEXT: lock orl $0, (%esp)
676+
; X86-SLM-NEXT: movl (%eax), %eax
677+
; X86-SLM-NEXT: retl
678+
;
679+
; X86-ATOM-LABEL: atomic_umax_zero:
680+
; X86-ATOM: # %bb.0:
681+
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
682+
; X86-ATOM-NEXT: lock orl $0, (%esp)
683+
; X86-ATOM-NEXT: movl (%eax), %eax
684+
; X86-ATOM-NEXT: nop
685+
; X86-ATOM-NEXT: nop
686+
; X86-ATOM-NEXT: retl
687+
atomicrmw umax ptr %addr, i32 0 seq_cst
688+
ret void
689+
}
690+
691+
define void @atomic_min_smax_char(ptr %addr) #0 {
692+
; X64-LABEL: atomic_min_smax_char:
693+
; X64: # %bb.0:
694+
; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
695+
; X64-NEXT: movzbl (%rdi), %eax
696+
; X64-NEXT: retq
697+
;
698+
; X86-SSE2-LABEL: atomic_min_smax_char:
699+
; X86-SSE2: # %bb.0:
700+
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
701+
; X86-SSE2-NEXT: mfence
702+
; X86-SSE2-NEXT: movzbl (%eax), %eax
703+
; X86-SSE2-NEXT: retl
704+
;
705+
; X86-SLM-LABEL: atomic_min_smax_char:
706+
; X86-SLM: # %bb.0:
707+
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %eax
708+
; X86-SLM-NEXT: lock orl $0, (%esp)
709+
; X86-SLM-NEXT: movzbl (%eax), %eax
710+
; X86-SLM-NEXT: retl
711+
;
712+
; X86-ATOM-LABEL: atomic_min_smax_char:
713+
; X86-ATOM: # %bb.0:
714+
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
715+
; X86-ATOM-NEXT: lock orl $0, (%esp)
716+
; X86-ATOM-NEXT: movzbl (%eax), %eax
717+
; X86-ATOM-NEXT: nop
718+
; X86-ATOM-NEXT: nop
719+
; X86-ATOM-NEXT: retl
720+
atomicrmw min ptr %addr, i8 127 seq_cst
721+
ret void
722+
}
723+
724+
define void @atomic_max_smin_char(ptr %addr) #0 {
725+
; X64-LABEL: atomic_max_smin_char:
726+
; X64: # %bb.0:
727+
; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
728+
; X64-NEXT: movzbl (%rdi), %eax
729+
; X64-NEXT: retq
730+
;
731+
; X86-SSE2-LABEL: atomic_max_smin_char:
732+
; X86-SSE2: # %bb.0:
733+
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
734+
; X86-SSE2-NEXT: mfence
735+
; X86-SSE2-NEXT: movzbl (%eax), %eax
736+
; X86-SSE2-NEXT: retl
737+
;
738+
; X86-SLM-LABEL: atomic_max_smin_char:
739+
; X86-SLM: # %bb.0:
740+
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %eax
741+
; X86-SLM-NEXT: lock orl $0, (%esp)
742+
; X86-SLM-NEXT: movzbl (%eax), %eax
743+
; X86-SLM-NEXT: retl
744+
;
745+
; X86-ATOM-LABEL: atomic_max_smin_char:
746+
; X86-ATOM: # %bb.0:
747+
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
748+
; X86-ATOM-NEXT: lock orl $0, (%esp)
749+
; X86-ATOM-NEXT: movzbl (%eax), %eax
750+
; X86-ATOM-NEXT: nop
751+
; X86-ATOM-NEXT: nop
752+
; X86-ATOM-NEXT: retl
753+
atomicrmw max ptr %addr, i8 -128 seq_cst
754+
ret void
755+
}
756+
757+
define void @atomic_min_umax_char(ptr %addr) #0 {
758+
; X64-LABEL: atomic_min_umax_char:
759+
; X64: # %bb.0:
760+
; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
761+
; X64-NEXT: movzbl (%rdi), %eax
762+
; X64-NEXT: retq
763+
;
764+
; X86-SSE2-LABEL: atomic_min_umax_char:
765+
; X86-SSE2: # %bb.0:
766+
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
767+
; X86-SSE2-NEXT: mfence
768+
; X86-SSE2-NEXT: movzbl (%eax), %eax
769+
; X86-SSE2-NEXT: retl
770+
;
771+
; X86-SLM-LABEL: atomic_min_umax_char:
772+
; X86-SLM: # %bb.0:
773+
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %eax
774+
; X86-SLM-NEXT: lock orl $0, (%esp)
775+
; X86-SLM-NEXT: movzbl (%eax), %eax
776+
; X86-SLM-NEXT: retl
777+
;
778+
; X86-ATOM-LABEL: atomic_min_umax_char:
779+
; X86-ATOM: # %bb.0:
780+
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
781+
; X86-ATOM-NEXT: lock orl $0, (%esp)
782+
; X86-ATOM-NEXT: movzbl (%eax), %eax
783+
; X86-ATOM-NEXT: nop
784+
; X86-ATOM-NEXT: nop
785+
; X86-ATOM-NEXT: retl
786+
atomicrmw umin ptr %addr, i8 255 seq_cst
787+
ret void
788+
}
789+
790+
define void @atomic_max_umin_char(ptr %addr) #0 {
791+
; X64-LABEL: atomic_max_umin_char:
792+
; X64: # %bb.0:
793+
; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
794+
; X64-NEXT: movzbl (%rdi), %eax
795+
; X64-NEXT: retq
796+
;
797+
; X86-SSE2-LABEL: atomic_max_umin_char:
798+
; X86-SSE2: # %bb.0:
799+
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
800+
; X86-SSE2-NEXT: mfence
801+
; X86-SSE2-NEXT: movzbl (%eax), %eax
802+
; X86-SSE2-NEXT: retl
803+
;
804+
; X86-SLM-LABEL: atomic_max_umin_char:
805+
; X86-SLM: # %bb.0:
806+
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %eax
807+
; X86-SLM-NEXT: lock orl $0, (%esp)
808+
; X86-SLM-NEXT: movzbl (%eax), %eax
809+
; X86-SLM-NEXT: retl
810+
;
811+
; X86-ATOM-LABEL: atomic_max_umin_char:
812+
; X86-ATOM: # %bb.0:
813+
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
814+
; X86-ATOM-NEXT: lock orl $0, (%esp)
815+
; X86-ATOM-NEXT: movzbl (%eax), %eax
816+
; X86-ATOM-NEXT: nop
817+
; X86-ATOM-NEXT: nop
818+
; X86-ATOM-NEXT: retl
819+
atomicrmw umax ptr %addr, i8 0 seq_cst
820+
ret void
821+
}
822+
823+
; TODO: Add floating point support.
824+
define void @atomic_fadd_zero(ptr %addr) #0 {
825+
; X64-LABEL: atomic_fadd_zero:
826+
; X64: # %bb.0:
827+
; X64-NEXT: movl (%rdi), %eax
828+
; X64-NEXT: .p2align 4
829+
; X64-NEXT: .LBB21_1: # %atomicrmw.start
830+
; X64-NEXT: # =>This Inner Loop Header: Depth=1
831+
; X64-NEXT: lock cmpxchgl %eax, (%rdi)
832+
; X64-NEXT: jne .LBB21_1
833+
; X64-NEXT: # %bb.2: # %atomicrmw.end
834+
; X64-NEXT: retq
835+
;
836+
; X86-SSE2-LABEL: atomic_fadd_zero:
837+
; X86-SSE2: # %bb.0:
838+
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
839+
; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
840+
; X86-SSE2-NEXT: .p2align 4
841+
; X86-SSE2-NEXT: .LBB21_1: # %atomicrmw.start
842+
; X86-SSE2-NEXT: # =>This Inner Loop Header: Depth=1
843+
; X86-SSE2-NEXT: movd %xmm0, %eax
844+
; X86-SSE2-NEXT: lock cmpxchgl %eax, (%ecx)
845+
; X86-SSE2-NEXT: movd %eax, %xmm0
846+
; X86-SSE2-NEXT: jne .LBB21_1
847+
; X86-SSE2-NEXT: # %bb.2: # %atomicrmw.end
848+
; X86-SSE2-NEXT: retl
849+
;
850+
; X86-SLM-LABEL: atomic_fadd_zero:
851+
; X86-SLM: # %bb.0:
852+
; X86-SLM-NEXT: subl $8, %esp
853+
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
854+
; X86-SLM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
855+
; X86-SLM-NEXT: .p2align 4
856+
; X86-SLM-NEXT: .LBB21_1: # %atomicrmw.start
857+
; X86-SLM-NEXT: # =>This Inner Loop Header: Depth=1
858+
; X86-SLM-NEXT: movss %xmm0, (%esp)
859+
; X86-SLM-NEXT: movl (%esp), %eax
860+
; X86-SLM-NEXT: lock cmpxchgl %eax, (%ecx)
861+
; X86-SLM-NEXT: movl %eax, {{[0-9]+}}(%esp)
862+
; X86-SLM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
863+
; X86-SLM-NEXT: jne .LBB21_1
864+
; X86-SLM-NEXT: # %bb.2: # %atomicrmw.end
865+
; X86-SLM-NEXT: addl $8, %esp
866+
; X86-SLM-NEXT: retl
867+
;
868+
; X86-ATOM-LABEL: atomic_fadd_zero:
869+
; X86-ATOM: # %bb.0:
870+
; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp
871+
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
872+
; X86-ATOM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
873+
; X86-ATOM-NEXT: .p2align 4
874+
; X86-ATOM-NEXT: .LBB21_1: # %atomicrmw.start
875+
; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1
876+
; X86-ATOM-NEXT: movss %xmm0, (%esp)
877+
; X86-ATOM-NEXT: movl (%esp), %eax
878+
; X86-ATOM-NEXT: lock cmpxchgl %eax, (%ecx)
879+
; X86-ATOM-NEXT: movl %eax, {{[0-9]+}}(%esp)
880+
; X86-ATOM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
881+
; X86-ATOM-NEXT: jne .LBB21_1
882+
; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end
883+
; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %esp
884+
; X86-ATOM-NEXT: retl
885+
atomicrmw fadd ptr %addr, float -0.0 monotonic
886+
ret void
887+
}
888+
889+
define void @atomic_fsub_zero(ptr %addr) #0 {
890+
; X64-LABEL: atomic_fsub_zero:
891+
; X64: # %bb.0:
892+
; X64-NEXT: movl (%rdi), %eax
893+
; X64-NEXT: .p2align 4
894+
; X64-NEXT: .LBB22_1: # %atomicrmw.start
895+
; X64-NEXT: # =>This Inner Loop Header: Depth=1
896+
; X64-NEXT: lock cmpxchgl %eax, (%rdi)
897+
; X64-NEXT: jne .LBB22_1
898+
; X64-NEXT: # %bb.2: # %atomicrmw.end
899+
; X64-NEXT: retq
900+
;
901+
; X86-SSE2-LABEL: atomic_fsub_zero:
902+
; X86-SSE2: # %bb.0:
903+
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
904+
; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
905+
; X86-SSE2-NEXT: .p2align 4
906+
; X86-SSE2-NEXT: .LBB22_1: # %atomicrmw.start
907+
; X86-SSE2-NEXT: # =>This Inner Loop Header: Depth=1
908+
; X86-SSE2-NEXT: movd %xmm0, %eax
909+
; X86-SSE2-NEXT: lock cmpxchgl %eax, (%ecx)
910+
; X86-SSE2-NEXT: movd %eax, %xmm0
911+
; X86-SSE2-NEXT: jne .LBB22_1
912+
; X86-SSE2-NEXT: # %bb.2: # %atomicrmw.end
913+
; X86-SSE2-NEXT: retl
914+
;
915+
; X86-SLM-LABEL: atomic_fsub_zero:
916+
; X86-SLM: # %bb.0:
917+
; X86-SLM-NEXT: subl $8, %esp
918+
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
919+
; X86-SLM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
920+
; X86-SLM-NEXT: .p2align 4
921+
; X86-SLM-NEXT: .LBB22_1: # %atomicrmw.start
922+
; X86-SLM-NEXT: # =>This Inner Loop Header: Depth=1
923+
; X86-SLM-NEXT: movss %xmm0, (%esp)
924+
; X86-SLM-NEXT: movl (%esp), %eax
925+
; X86-SLM-NEXT: lock cmpxchgl %eax, (%ecx)
926+
; X86-SLM-NEXT: movl %eax, {{[0-9]+}}(%esp)
927+
; X86-SLM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
928+
; X86-SLM-NEXT: jne .LBB22_1
929+
; X86-SLM-NEXT: # %bb.2: # %atomicrmw.end
930+
; X86-SLM-NEXT: addl $8, %esp
931+
; X86-SLM-NEXT: retl
932+
;
933+
; X86-ATOM-LABEL: atomic_fsub_zero:
934+
; X86-ATOM: # %bb.0:
935+
; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp
936+
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
937+
; X86-ATOM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
938+
; X86-ATOM-NEXT: .p2align 4
939+
; X86-ATOM-NEXT: .LBB22_1: # %atomicrmw.start
940+
; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1
941+
; X86-ATOM-NEXT: movss %xmm0, (%esp)
942+
; X86-ATOM-NEXT: movl (%esp), %eax
943+
; X86-ATOM-NEXT: lock cmpxchgl %eax, (%ecx)
944+
; X86-ATOM-NEXT: movl %eax, {{[0-9]+}}(%esp)
945+
; X86-ATOM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
946+
; X86-ATOM-NEXT: jne .LBB22_1
947+
; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end
948+
; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %esp
949+
; X86-ATOM-NEXT: retl
950+
atomicrmw fsub ptr %addr, float 0.0 release
951+
ret void
952+
}
953+
625954
attributes #0 = { nounwind }

0 commit comments

Comments
 (0)