[NFC][InstCombine][AArch64] Add simplify tests for reversed SVE intrinsics.

paulwalker-arm · GeorgeARM · commit 618b7fbce010 · 2025-05-07T16:28:58.000+01:00
Add missing tests for fdivr, fsubr, sdivr, subr &amp; udivr.
Add test case to demonstrate incorrect poison propagation.
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-binop.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-binop.ll
@@ -103,6 +103,16 @@ define <vscale x 4 x i32> @constant_mul_u_after_striping_inactive_lanes(<vscale
   ret <vscale x 4 x i32> %3
 }
 
+; SVE intrinsics don't have the same poison propagation rules as the IR.
+define <vscale x 4 x i32> @dont_propagate_poison(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @dont_propagate_poison(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> poison
+;
+  %r = call <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> poison, <vscale x 4 x i32> splat (i32 1))
+  ret <vscale x 4 x i32> %r
+}
+
 ; The follow tests demonstrate the operations for which hooks are in place to
 ; enable simplification. Given the simplications themselves are common code, it
 ; is assumed they are already well tested elsewhere.
@@ -202,6 +212,16 @@ define <vscale x 4 x float> @constant_fdiv_u(<vscale x 4 x i1> %pg) #0 {
   ret <vscale x 4 x float> %r
 }
 
+define <vscale x 4 x float> @constant_fdivr(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @constant_fdivr(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fdivr.nxv4f32(<vscale x 4 x i1> [[PG]], <vscale x 4 x float> splat (float 6.000000e+00), <vscale x 4 x float> splat (float 1.200000e+01))
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = call <vscale x 4 x float> @llvm.aarch64.sve.fdivr.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> splat (float 6.0), <vscale x 4 x float> splat (float 12.0))
+  ret <vscale x 4 x float> %r
+}
+
 define <vscale x 4 x float> @constant_fmul(<vscale x 4 x i1> %pg) #0 {
 ; CHECK-LABEL: define <vscale x 4 x float> @constant_fmul(
 ; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
@@ -240,6 +260,16 @@ define <vscale x 4 x float> @constant_fsub_u(<vscale x 4 x i1> %pg) #0 {
   ret <vscale x 4 x float> %r
 }
 
+define <vscale x 4 x float> @constant_fsubr(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @constant_fsubr(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4f32(<vscale x 4 x i1> [[PG]], <vscale x 4 x float> splat (float 6.000000e+00), <vscale x 4 x float> splat (float 7.000000e+00))
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = call <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> splat (float 6.0), <vscale x 4 x float> splat (float 7.0))
+  ret <vscale x 4 x float> %r
+}
+
 define <vscale x 4 x i32> @constant_mul(<vscale x 4 x i1> %pg) #0 {
 ; CHECK-LABEL: define <vscale x 4 x i32> @constant_mul(
 ; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
@@ -341,6 +371,39 @@ define <vscale x 4 x i32> @constant_sdiv_u_with_overflow(<vscale x 4 x i1> %pg)
   %r = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 -2147483648), <vscale x 4 x i32> splat (i32 -1))
   ret <vscale x 4 x i32> %r
 }
+
+define <vscale x 4 x i32> @constant_sdivr(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @constant_sdivr(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 3), <vscale x 4 x i32> splat (i32 -7))
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = call <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 3), <vscale x 4 x i32> splat (i32 -7))
+  ret <vscale x 4 x i32> %r
+}
+
+; The intrinsic's IR equivalent does not support divide-by-zero.
+define <vscale x 4 x i32> @constant_sdivr_by_zero(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @constant_sdivr_by_zero(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> splat (i32 -7))
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = call <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 0), <vscale x 4 x i32> splat (i32 -7))
+  ret <vscale x 4 x i32> %r
+}
+
+; The intrinsic's IR equivalent does not support overflow.
+define <vscale x 4 x i32> @constant_sdivr_with_overflow(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @constant_sdivr_with_overflow(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -1), <vscale x 4 x i32> splat (i32 -2147483648))
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = call <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 -1), <vscale x 4 x i32> splat (i32 -2147483648))
+  ret <vscale x 4 x i32> %r
+}
+
 define <vscale x 4 x i32> @constant_sub(<vscale x 4 x i1> %pg) #0 {
 ; CHECK-LABEL: define <vscale x 4 x i32> @constant_sub(
 ; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
@@ -360,6 +423,16 @@ define <vscale x 4 x i32> @constant_sub_u(<vscale x 4 x i1> %pg) #0 {
   ret <vscale x 4 x i32> %r
 }
 
+define <vscale x 4 x i32> @constant_subr(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @constant_subr(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3))
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3))
+  ret <vscale x 4 x i32> %r
+}
+
 define <vscale x 4 x i32> @constant_udiv(<vscale x 4 x i1> %pg) #0 {
 ; CHECK-LABEL: define <vscale x 4 x i32> @constant_udiv(
 ; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
@@ -402,6 +475,27 @@ define <vscale x 4 x i32> @constant_udiv_u_by_zero(<vscale x 4 x i1> %pg) #0 {
   ret <vscale x 4 x i32> %r
 }
 
+define <vscale x 4 x i32> @constant_udivr(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @constant_udivr(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.udivr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 3), <vscale x 4 x i32> splat (i32 7))
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = call <vscale x 4 x i32> @llvm.aarch64.sve.udivr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 3), <vscale x 4 x i32> splat (i32 7))
+  ret <vscale x 4 x i32> %r
+}
+
+; The intrinsic's IR equivalent does not support divide-by-zero.
+define <vscale x 4 x i32> @constant_udivr_by_zero(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @constant_udivr_by_zero(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.udivr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> splat (i32 7))
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = call <vscale x 4 x i32> @llvm.aarch64.sve.udivr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 0), <vscale x 4 x i32> splat (i32 7))
+  ret <vscale x 4 x i32> %r
+}
+
 declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
 
 declare <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
@@ -410,8 +504,11 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1>, <vsc
 declare <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
 declare <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
 declare <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
 declare <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
 declare <vscale x 4 x i32> @llvm.aarch64.sve.udiv.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.udivr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
 
 declare <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
 declare <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
@@ -424,8 +521,10 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.udiv.u.nxv4i32(<vscale x 4 x i1>, <
 
 declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
 declare <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fdivr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
 declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
 declare <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
 
 declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
 declare <vscale x 4 x float> @llvm.aarch64.sve.fdiv.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)