Skip to content

Commit 618b7fb

Browse files
paulwalker-armGeorgeARM
authored andcommitted
[NFC][InstCombine][AArch64] Add simplify tests for reversed SVE intrinsics.
Add missing tests for fdivr, fsubr, sdivr, subr & udivr. Add test case to demonstrate incorrect poison propagation.
1 parent 8884ecf commit 618b7fb

File tree

1 file changed

+99
-0
lines changed

1 file changed

+99
-0
lines changed

llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-binop.ll

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,16 @@ define <vscale x 4 x i32> @constant_mul_u_after_striping_inactive_lanes(<vscale
103103
ret <vscale x 4 x i32> %3
104104
}
105105

106+
; SVE intrinsics don't have the same poison propagation rules as the IR.
107+
define <vscale x 4 x i32> @dont_propagate_poison(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) #0 {
108+
; CHECK-LABEL: define <vscale x 4 x i32> @dont_propagate_poison(
109+
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]]) #[[ATTR0]] {
110+
; CHECK-NEXT: ret <vscale x 4 x i32> poison
111+
;
112+
%r = call <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> poison, <vscale x 4 x i32> splat (i32 1))
113+
ret <vscale x 4 x i32> %r
114+
}
115+
106116
; The follow tests demonstrate the operations for which hooks are in place to
107117
; enable simplification. Given the simplications themselves are common code, it
108118
; is assumed they are already well tested elsewhere.
@@ -202,6 +212,16 @@ define <vscale x 4 x float> @constant_fdiv_u(<vscale x 4 x i1> %pg) #0 {
202212
ret <vscale x 4 x float> %r
203213
}
204214

215+
define <vscale x 4 x float> @constant_fdivr(<vscale x 4 x i1> %pg) #0 {
216+
; CHECK-LABEL: define <vscale x 4 x float> @constant_fdivr(
217+
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
218+
; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fdivr.nxv4f32(<vscale x 4 x i1> [[PG]], <vscale x 4 x float> splat (float 6.000000e+00), <vscale x 4 x float> splat (float 1.200000e+01))
219+
; CHECK-NEXT: ret <vscale x 4 x float> [[R]]
220+
;
221+
%r = call <vscale x 4 x float> @llvm.aarch64.sve.fdivr.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> splat (float 6.0), <vscale x 4 x float> splat (float 12.0))
222+
ret <vscale x 4 x float> %r
223+
}
224+
205225
define <vscale x 4 x float> @constant_fmul(<vscale x 4 x i1> %pg) #0 {
206226
; CHECK-LABEL: define <vscale x 4 x float> @constant_fmul(
207227
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
@@ -240,6 +260,16 @@ define <vscale x 4 x float> @constant_fsub_u(<vscale x 4 x i1> %pg) #0 {
240260
ret <vscale x 4 x float> %r
241261
}
242262

263+
define <vscale x 4 x float> @constant_fsubr(<vscale x 4 x i1> %pg) #0 {
264+
; CHECK-LABEL: define <vscale x 4 x float> @constant_fsubr(
265+
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
266+
; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4f32(<vscale x 4 x i1> [[PG]], <vscale x 4 x float> splat (float 6.000000e+00), <vscale x 4 x float> splat (float 7.000000e+00))
267+
; CHECK-NEXT: ret <vscale x 4 x float> [[R]]
268+
;
269+
%r = call <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> splat (float 6.0), <vscale x 4 x float> splat (float 7.0))
270+
ret <vscale x 4 x float> %r
271+
}
272+
243273
define <vscale x 4 x i32> @constant_mul(<vscale x 4 x i1> %pg) #0 {
244274
; CHECK-LABEL: define <vscale x 4 x i32> @constant_mul(
245275
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
@@ -341,6 +371,39 @@ define <vscale x 4 x i32> @constant_sdiv_u_with_overflow(<vscale x 4 x i1> %pg)
341371
%r = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 -2147483648), <vscale x 4 x i32> splat (i32 -1))
342372
ret <vscale x 4 x i32> %r
343373
}
374+
375+
define <vscale x 4 x i32> @constant_sdivr(<vscale x 4 x i1> %pg) #0 {
376+
; CHECK-LABEL: define <vscale x 4 x i32> @constant_sdivr(
377+
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
378+
; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 3), <vscale x 4 x i32> splat (i32 -7))
379+
; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
380+
;
381+
%r = call <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 3), <vscale x 4 x i32> splat (i32 -7))
382+
ret <vscale x 4 x i32> %r
383+
}
384+
385+
; The intrinsic's IR equivalent does not support divide-by-zero.
386+
define <vscale x 4 x i32> @constant_sdivr_by_zero(<vscale x 4 x i1> %pg) #0 {
387+
; CHECK-LABEL: define <vscale x 4 x i32> @constant_sdivr_by_zero(
388+
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
389+
; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> splat (i32 -7))
390+
; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
391+
;
392+
%r = call <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 0), <vscale x 4 x i32> splat (i32 -7))
393+
ret <vscale x 4 x i32> %r
394+
}
395+
396+
; The intrinsic's IR equivalent does not support overflow.
397+
define <vscale x 4 x i32> @constant_sdivr_with_overflow(<vscale x 4 x i1> %pg) #0 {
398+
; CHECK-LABEL: define <vscale x 4 x i32> @constant_sdivr_with_overflow(
399+
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
400+
; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -1), <vscale x 4 x i32> splat (i32 -2147483648))
401+
; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
402+
;
403+
%r = call <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 -1), <vscale x 4 x i32> splat (i32 -2147483648))
404+
ret <vscale x 4 x i32> %r
405+
}
406+
344407
define <vscale x 4 x i32> @constant_sub(<vscale x 4 x i1> %pg) #0 {
345408
; CHECK-LABEL: define <vscale x 4 x i32> @constant_sub(
346409
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
@@ -360,6 +423,16 @@ define <vscale x 4 x i32> @constant_sub_u(<vscale x 4 x i1> %pg) #0 {
360423
ret <vscale x 4 x i32> %r
361424
}
362425

426+
define <vscale x 4 x i32> @constant_subr(<vscale x 4 x i1> %pg) #0 {
427+
; CHECK-LABEL: define <vscale x 4 x i32> @constant_subr(
428+
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
429+
; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3))
430+
; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
431+
;
432+
%r = call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3))
433+
ret <vscale x 4 x i32> %r
434+
}
435+
363436
define <vscale x 4 x i32> @constant_udiv(<vscale x 4 x i1> %pg) #0 {
364437
; CHECK-LABEL: define <vscale x 4 x i32> @constant_udiv(
365438
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
@@ -402,6 +475,27 @@ define <vscale x 4 x i32> @constant_udiv_u_by_zero(<vscale x 4 x i1> %pg) #0 {
402475
ret <vscale x 4 x i32> %r
403476
}
404477

478+
define <vscale x 4 x i32> @constant_udivr(<vscale x 4 x i1> %pg) #0 {
479+
; CHECK-LABEL: define <vscale x 4 x i32> @constant_udivr(
480+
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
481+
; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.udivr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 3), <vscale x 4 x i32> splat (i32 7))
482+
; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
483+
;
484+
%r = call <vscale x 4 x i32> @llvm.aarch64.sve.udivr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 3), <vscale x 4 x i32> splat (i32 7))
485+
ret <vscale x 4 x i32> %r
486+
}
487+
488+
; The intrinsic's IR equivalent does not support divide-by-zero.
489+
define <vscale x 4 x i32> @constant_udivr_by_zero(<vscale x 4 x i1> %pg) #0 {
490+
; CHECK-LABEL: define <vscale x 4 x i32> @constant_udivr_by_zero(
491+
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
492+
; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.udivr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> splat (i32 7))
493+
; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
494+
;
495+
%r = call <vscale x 4 x i32> @llvm.aarch64.sve.udivr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 0), <vscale x 4 x i32> splat (i32 7))
496+
ret <vscale x 4 x i32> %r
497+
}
498+
405499
declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
406500

407501
declare <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
@@ -410,8 +504,11 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1>, <vsc
410504
declare <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
411505
declare <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
412506
declare <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
507+
declare <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
413508
declare <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
509+
declare <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
414510
declare <vscale x 4 x i32> @llvm.aarch64.sve.udiv.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
511+
declare <vscale x 4 x i32> @llvm.aarch64.sve.udivr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
415512

416513
declare <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
417514
declare <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
@@ -424,8 +521,10 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.udiv.u.nxv4i32(<vscale x 4 x i1>, <
424521

425522
declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
426523
declare <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
524+
declare <vscale x 4 x float> @llvm.aarch64.sve.fdivr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
427525
declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
428526
declare <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
527+
declare <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
429528

430529
declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
431530
declare <vscale x 4 x float> @llvm.aarch64.sve.fdiv.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)

0 commit comments

Comments
 (0)