You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-binop.ll
+99Lines changed: 99 additions & 0 deletions
Original file line number
Diff line number
Diff line change
@@ -103,6 +103,16 @@ define <vscale x 4 x i32> @constant_mul_u_after_striping_inactive_lanes(<vscale
103
103
ret <vscale x 4 x i32> %3
104
104
}
105
105
106
+
; SVE intrinsics don't have the same poison propagation rules as the IR.
107
+
define <vscale x 4 x i32> @dont_propagate_poison(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) #0 {
108
+
; CHECK-LABEL: define <vscale x 4 x i32> @dont_propagate_poison(
109
+
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]]) #[[ATTR0]] {
110
+
; CHECK-NEXT: ret <vscale x 4 x i32> poison
111
+
;
112
+
%r = call <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> poison, <vscale x 4 x i32> splat (i321))
113
+
ret <vscale x 4 x i32> %r
114
+
}
115
+
106
116
; The follow tests demonstrate the operations for which hooks are in place to
107
117
; enable simplification. Given the simplications themselves are common code, it
108
118
; is assumed they are already well tested elsewhere.
@@ -202,6 +212,16 @@ define <vscale x 4 x float> @constant_fdiv_u(<vscale x 4 x i1> %pg) #0 {
202
212
ret <vscale x 4 x float> %r
203
213
}
204
214
215
+
define <vscale x 4 x float> @constant_fdivr(<vscale x 4 x i1> %pg) #0 {
216
+
; CHECK-LABEL: define <vscale x 4 x float> @constant_fdivr(
217
+
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
218
+
; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fdivr.nxv4f32(<vscale x 4 x i1> [[PG]], <vscale x 4 x float> splat (float 6.000000e+00), <vscale x 4 x float> splat (float 1.200000e+01))
219
+
; CHECK-NEXT: ret <vscale x 4 x float> [[R]]
220
+
;
221
+
%r = call <vscale x 4 x float> @llvm.aarch64.sve.fdivr.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> splat (float6.0), <vscale x 4 x float> splat (float12.0))
222
+
ret <vscale x 4 x float> %r
223
+
}
224
+
205
225
define <vscale x 4 x float> @constant_fmul(<vscale x 4 x i1> %pg) #0 {
206
226
; CHECK-LABEL: define <vscale x 4 x float> @constant_fmul(
207
227
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
@@ -240,6 +260,16 @@ define <vscale x 4 x float> @constant_fsub_u(<vscale x 4 x i1> %pg) #0 {
240
260
ret <vscale x 4 x float> %r
241
261
}
242
262
263
+
define <vscale x 4 x float> @constant_fsubr(<vscale x 4 x i1> %pg) #0 {
264
+
; CHECK-LABEL: define <vscale x 4 x float> @constant_fsubr(
265
+
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
266
+
; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4f32(<vscale x 4 x i1> [[PG]], <vscale x 4 x float> splat (float 6.000000e+00), <vscale x 4 x float> splat (float 7.000000e+00))
267
+
; CHECK-NEXT: ret <vscale x 4 x float> [[R]]
268
+
;
269
+
%r = call <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> splat (float6.0), <vscale x 4 x float> splat (float7.0))
270
+
ret <vscale x 4 x float> %r
271
+
}
272
+
243
273
define <vscale x 4 x i32> @constant_mul(<vscale x 4 x i1> %pg) #0 {
244
274
; CHECK-LABEL: define <vscale x 4 x i32> @constant_mul(
245
275
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
@@ -341,6 +371,39 @@ define <vscale x 4 x i32> @constant_sdiv_u_with_overflow(<vscale x 4 x i1> %pg)
341
371
%r = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 -2147483648), <vscale x 4 x i32> splat (i32 -1))
342
372
ret <vscale x 4 x i32> %r
343
373
}
374
+
375
+
define <vscale x 4 x i32> @constant_sdivr(<vscale x 4 x i1> %pg) #0 {
376
+
; CHECK-LABEL: define <vscale x 4 x i32> @constant_sdivr(
377
+
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
378
+
; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 3), <vscale x 4 x i32> splat (i32 -7))
379
+
; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
380
+
;
381
+
%r = call <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i323), <vscale x 4 x i32> splat (i32 -7))
382
+
ret <vscale x 4 x i32> %r
383
+
}
384
+
385
+
; The intrinsic's IR equivalent does not support divide-by-zero.
386
+
define <vscale x 4 x i32> @constant_sdivr_by_zero(<vscale x 4 x i1> %pg) #0 {
387
+
; CHECK-LABEL: define <vscale x 4 x i32> @constant_sdivr_by_zero(
388
+
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
389
+
; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> splat (i32 -7))
390
+
; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
391
+
;
392
+
%r = call <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i320), <vscale x 4 x i32> splat (i32 -7))
393
+
ret <vscale x 4 x i32> %r
394
+
}
395
+
396
+
; The intrinsic's IR equivalent does not support overflow.
397
+
define <vscale x 4 x i32> @constant_sdivr_with_overflow(<vscale x 4 x i1> %pg) #0 {
398
+
; CHECK-LABEL: define <vscale x 4 x i32> @constant_sdivr_with_overflow(
399
+
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
400
+
; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -1), <vscale x 4 x i32> splat (i32 -2147483648))
401
+
; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
402
+
;
403
+
%r = call <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 -1), <vscale x 4 x i32> splat (i32 -2147483648))
404
+
ret <vscale x 4 x i32> %r
405
+
}
406
+
344
407
define <vscale x 4 x i32> @constant_sub(<vscale x 4 x i1> %pg) #0 {
345
408
; CHECK-LABEL: define <vscale x 4 x i32> @constant_sub(
346
409
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
@@ -360,6 +423,16 @@ define <vscale x 4 x i32> @constant_sub_u(<vscale x 4 x i1> %pg) #0 {
360
423
ret <vscale x 4 x i32> %r
361
424
}
362
425
426
+
define <vscale x 4 x i32> @constant_subr(<vscale x 4 x i1> %pg) #0 {
427
+
; CHECK-LABEL: define <vscale x 4 x i32> @constant_subr(
428
+
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
429
+
; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3))
430
+
; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
431
+
;
432
+
%r = call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i327), <vscale x 4 x i32> splat (i323))
433
+
ret <vscale x 4 x i32> %r
434
+
}
435
+
363
436
define <vscale x 4 x i32> @constant_udiv(<vscale x 4 x i1> %pg) #0 {
364
437
; CHECK-LABEL: define <vscale x 4 x i32> @constant_udiv(
365
438
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
@@ -402,6 +475,27 @@ define <vscale x 4 x i32> @constant_udiv_u_by_zero(<vscale x 4 x i1> %pg) #0 {
402
475
ret <vscale x 4 x i32> %r
403
476
}
404
477
478
+
define <vscale x 4 x i32> @constant_udivr(<vscale x 4 x i1> %pg) #0 {
479
+
; CHECK-LABEL: define <vscale x 4 x i32> @constant_udivr(
480
+
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
481
+
; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.udivr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 3), <vscale x 4 x i32> splat (i32 7))
482
+
; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
483
+
;
484
+
%r = call <vscale x 4 x i32> @llvm.aarch64.sve.udivr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i323), <vscale x 4 x i32> splat (i327))
485
+
ret <vscale x 4 x i32> %r
486
+
}
487
+
488
+
; The intrinsic's IR equivalent does not support divide-by-zero.
489
+
define <vscale x 4 x i32> @constant_udivr_by_zero(<vscale x 4 x i1> %pg) #0 {
490
+
; CHECK-LABEL: define <vscale x 4 x i32> @constant_udivr_by_zero(
491
+
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
492
+
; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.udivr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> splat (i32 7))
493
+
; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
494
+
;
495
+
%r = call <vscale x 4 x i32> @llvm.aarch64.sve.udivr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i320), <vscale x 4 x i32> splat (i327))
496
+
ret <vscale x 4 x i32> %r
497
+
}
498
+
405
499
declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
406
500
407
501
declare <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
@@ -410,8 +504,11 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1>, <vsc
410
504
declare <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
411
505
declare <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
412
506
declare <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
507
+
declare <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
413
508
declare <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
509
+
declare <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
414
510
declare <vscale x 4 x i32> @llvm.aarch64.sve.udiv.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
511
+
declare <vscale x 4 x i32> @llvm.aarch64.sve.udivr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
415
512
416
513
declare <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
417
514
declare <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
@@ -424,8 +521,10 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.udiv.u.nxv4i32(<vscale x 4 x i1>, <
424
521
425
522
declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
426
523
declare <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
524
+
declare <vscale x 4 x float> @llvm.aarch64.sve.fdivr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
427
525
declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
428
526
declare <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
527
+
declare <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
429
528
430
529
declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
431
530
declare <vscale x 4 x float> @llvm.aarch64.sve.fdiv.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
0 commit comments