Skip to content

Commit 352c48f

Browse files
authored
[SelectionDAG] Utilizing target hook convertSelectOfConstantsToMath for SelectwithConstant (#127599)
The Target hook convertSelectOfConstantsToMath() needs to be used within SimplifySelectCC helper combine function in SelectionDAG Isel, where generic select folding with constants is happening into simple maths op using the condition as it is. It necessarily fixes #121145.
1 parent af68927 commit 352c48f

29 files changed

+2765
-3166
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28194,7 +28194,8 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
2819428194
if ((Fold || Swap) &&
2819528195
TLI.getBooleanContents(CmpOpVT) ==
2819628196
TargetLowering::ZeroOrOneBooleanContent &&
28197-
(!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
28197+
(!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT)) &&
28198+
TLI.convertSelectOfConstantsToMath(VT)) {
2819828199

2819928200
if (Swap) {
2820028201
CC = ISD::getSetCCInverse(CC, CmpOpVT);

llvm/test/CodeGen/AArch64/bfis-in-loop.ll

Lines changed: 40 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -13,25 +13,26 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
1313
define i64 @bfis_in_loop_zero() {
1414
; CHECK-LABEL: bfis_in_loop_zero:
1515
; CHECK: // %bb.0: // %entry
16-
; CHECK-NEXT: adrp x8, :got:global
17-
; CHECK-NEXT: mov x0, xzr
18-
; CHECK-NEXT: mov w9, wzr
19-
; CHECK-NEXT: ldr x8, [x8, :got_lo12:global]
20-
; CHECK-NEXT: ldr x8, [x8]
21-
; CHECK-NEXT: .LBB0_1: // %midblock
22-
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
23-
; CHECK-NEXT: ldrh w10, [x8, #72]
24-
; CHECK-NEXT: ldr x13, [x8, #8]
25-
; CHECK-NEXT: lsr w11, w10, #8
26-
; CHECK-NEXT: cmp w10, #0
27-
; CHECK-NEXT: ldr x8, [x13, #16]
28-
; CHECK-NEXT: cset w12, ne
29-
; CHECK-NEXT: csel w9, w9, w11, eq
30-
; CHECK-NEXT: and x11, x0, #0xffffffff00000000
31-
; CHECK-NEXT: bfi w10, w9, #8, #24
32-
; CHECK-NEXT: orr x11, x11, x12, lsl #16
33-
; CHECK-NEXT: orr x0, x11, x10
34-
; CHECK-NEXT: cbnz x13, .LBB0_1
16+
; CHECK-NEXT: adrp x9, :got:global
17+
; CHECK-NEXT: mov x0, xzr
18+
; CHECK-NEXT: mov w8, wzr
19+
; CHECK-NEXT: ldr x9, [x9, :got_lo12:global]
20+
; CHECK-NEXT: mov w10, #65536 // =0x10000
21+
; CHECK-NEXT: ldr x9, [x9]
22+
; CHECK-NEXT: .LBB0_1: // %midblock
23+
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
24+
; CHECK-NEXT: ldrh w11, [x9, #72]
25+
; CHECK-NEXT: and x13, x0, #0xffffffff00000000
26+
; CHECK-NEXT: lsr w12, w11, #8
27+
; CHECK-NEXT: cmp w11, #0
28+
; CHECK-NEXT: csel w8, w8, w12, eq
29+
; CHECK-NEXT: ldr x12, [x9, #8]
30+
; CHECK-NEXT: csel x9, xzr, x10, eq
31+
; CHECK-NEXT: bfi w11, w8, #8, #24
32+
; CHECK-NEXT: orr x13, x9, x13
33+
; CHECK-NEXT: ldr x9, [x12, #16]
34+
; CHECK-NEXT: orr x0, x13, x11
35+
; CHECK-NEXT: cbnz x12, .LBB0_1
3536
; CHECK-NEXT: // %bb.2: // %exit
3637
; CHECK-NEXT: ret
3738
entry:
@@ -80,25 +81,26 @@ exit:
8081
define i64 @bfis_in_loop_undef() {
8182
; CHECK-LABEL: bfis_in_loop_undef:
8283
; CHECK: // %bb.0: // %entry
83-
; CHECK-NEXT: adrp x9, :got:global
84-
; CHECK-NEXT: mov w8, wzr
85-
; CHECK-NEXT: // implicit-def: $x0
86-
; CHECK-NEXT: ldr x9, [x9, :got_lo12:global]
87-
; CHECK-NEXT: ldr x9, [x9]
88-
; CHECK-NEXT: .LBB1_1: // %midblock
89-
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
90-
; CHECK-NEXT: ldrh w10, [x9, #72]
91-
; CHECK-NEXT: ldr x13, [x9, #8]
92-
; CHECK-NEXT: lsr w11, w10, #8
93-
; CHECK-NEXT: cmp w10, #0
94-
; CHECK-NEXT: ldr x9, [x13, #16]
95-
; CHECK-NEXT: cset w12, ne
96-
; CHECK-NEXT: csel w8, w8, w11, eq
97-
; CHECK-NEXT: and x11, x0, #0xffffffff00000000
98-
; CHECK-NEXT: bfi w10, w8, #8, #24
99-
; CHECK-NEXT: orr x11, x11, x12, lsl #16
100-
; CHECK-NEXT: orr x0, x11, x10
101-
; CHECK-NEXT: cbnz x13, .LBB1_1
84+
; CHECK-NEXT: adrp x9, :got:global
85+
; CHECK-NEXT: mov w8, wzr
86+
; CHECK-NEXT: // implicit-def: $x0
87+
; CHECK-NEXT: ldr x9, [x9, :got_lo12:global]
88+
; CHECK-NEXT: ldr x10, [x9]
89+
; CHECK-NEXT: mov w9, #65536 // =0x10000
90+
; CHECK-NEXT: .LBB1_1: // %midblock
91+
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
92+
; CHECK-NEXT: ldrh w11, [x10, #72]
93+
; CHECK-NEXT: and x13, x0, #0xffffffff00000000
94+
; CHECK-NEXT: lsr w12, w11, #8
95+
; CHECK-NEXT: cmp w11, #0
96+
; CHECK-NEXT: csel w8, w8, w12, eq
97+
; CHECK-NEXT: ldr x12, [x10, #8]
98+
; CHECK-NEXT: csel x10, xzr, x9, eq
99+
; CHECK-NEXT: bfi w11, w8, #8, #24
100+
; CHECK-NEXT: orr x13, x10, x13
101+
; CHECK-NEXT: ldr x10, [x12, #16]
102+
; CHECK-NEXT: orr x0, x13, x11
103+
; CHECK-NEXT: cbnz x12, .LBB1_1
102104
; CHECK-NEXT: // %bb.2: // %exit
103105
; CHECK-NEXT: ret
104106
entry:

llvm/test/CodeGen/AArch64/select_cc.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ define i64 @select_ogt_float(float %a, float %b) {
66
; CHECK-SD-LABEL: select_ogt_float:
77
; CHECK-SD: // %bb.0: // %entry
88
; CHECK-SD-NEXT: fcmp s0, s1
9-
; CHECK-SD-NEXT: cset w8, gt
10-
; CHECK-SD-NEXT: ubfiz x0, x8, #2, #32
9+
; CHECK-SD-NEXT: mov w8, #4 // =0x4
10+
; CHECK-SD-NEXT: csel x0, x8, xzr, gt
1111
; CHECK-SD-NEXT: ret
1212
;
1313
; CHECK-GI-LABEL: select_ogt_float:
@@ -26,8 +26,8 @@ define i64 @select_ule_float_inverse(float %a, float %b) {
2626
; CHECK-SD-LABEL: select_ule_float_inverse:
2727
; CHECK-SD: // %bb.0: // %entry
2828
; CHECK-SD-NEXT: fcmp s0, s1
29-
; CHECK-SD-NEXT: cset w8, gt
30-
; CHECK-SD-NEXT: ubfiz x0, x8, #2, #32
29+
; CHECK-SD-NEXT: mov w8, #4 // =0x4
30+
; CHECK-SD-NEXT: csel x0, xzr, x8, le
3131
; CHECK-SD-NEXT: ret
3232
;
3333
; CHECK-GI-LABEL: select_ule_float_inverse:
@@ -45,9 +45,9 @@ entry:
4545
define i64 @select_eq_i32(i32 %a, i32 %b) {
4646
; CHECK-SD-LABEL: select_eq_i32:
4747
; CHECK-SD: // %bb.0: // %entry
48+
; CHECK-SD-NEXT: mov w8, #4 // =0x4
4849
; CHECK-SD-NEXT: cmp w0, w1
49-
; CHECK-SD-NEXT: cset w8, eq
50-
; CHECK-SD-NEXT: ubfiz x0, x8, #2, #32
50+
; CHECK-SD-NEXT: csel x0, x8, xzr, eq
5151
; CHECK-SD-NEXT: ret
5252
;
5353
; CHECK-GI-LABEL: select_eq_i32:
@@ -65,9 +65,9 @@ entry:
6565
define i64 @select_ne_i32_inverse(i32 %a, i32 %b) {
6666
; CHECK-SD-LABEL: select_ne_i32_inverse:
6767
; CHECK-SD: // %bb.0: // %entry
68+
; CHECK-SD-NEXT: mov w8, #4 // =0x4
6869
; CHECK-SD-NEXT: cmp w0, w1
69-
; CHECK-SD-NEXT: cset w8, eq
70-
; CHECK-SD-NEXT: ubfiz x0, x8, #2, #32
70+
; CHECK-SD-NEXT: csel x0, xzr, x8, ne
7171
; CHECK-SD-NEXT: ret
7272
;
7373
; CHECK-GI-LABEL: select_ne_i32_inverse:

llvm/test/CodeGen/AArch64/selectopt-const.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,24 +13,24 @@ define i32 @test_const(ptr %in1, ptr %in2, ptr %out, i32 %n, ptr %tbl) {
1313
; CHECK-NEXT: mov w8, w3
1414
; CHECK-NEXT: movk w9, #16309, lsl #16
1515
; CHECK-NEXT: fmov s0, w9
16+
; CHECK-NEXT: mov w9, #16 // =0x10
1617
; CHECK-NEXT: .p2align 5, , 16
1718
; CHECK-NEXT: .LBB0_2: // %for.body
1819
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
1920
; CHECK-NEXT: ldr s4, [x1], #4
20-
; CHECK-NEXT: ldr w9, [x0], #4
21-
; CHECK-NEXT: add w9, w9, #10
22-
; CHECK-NEXT: scvtf d3, w9
21+
; CHECK-NEXT: ldr w10, [x0], #4
22+
; CHECK-NEXT: add w10, w10, #10
23+
; CHECK-NEXT: scvtf d3, w10
2324
; CHECK-NEXT: fmadd s4, s4, s0, s1
2425
; CHECK-NEXT: fabs s4, s4
2526
; CHECK-NEXT: fcvt d4, s4
2627
; CHECK-NEXT: fdiv d3, d3, d4
2728
; CHECK-NEXT: fcmp d3, d2
28-
; CHECK-NEXT: cset w9, lt
29+
; CHECK-NEXT: csel x10, x9, xzr, lt
2930
; CHECK-NEXT: subs x8, x8, #1
30-
; CHECK-NEXT: ubfiz x9, x9, #4, #32
31-
; CHECK-NEXT: ldr s3, [x4, x9]
32-
; CHECK-NEXT: fcvtzs w9, s3
33-
; CHECK-NEXT: str w9, [x2], #4
31+
; CHECK-NEXT: ldr s3, [x4, x10]
32+
; CHECK-NEXT: fcvtzs w10, s3
33+
; CHECK-NEXT: str w10, [x2], #4
3434
; CHECK-NEXT: b.ne .LBB0_2
3535
; CHECK-NEXT: .LBB0_3: // %for.cond.cleanup
3636
; CHECK-NEXT: mov w0, wzr

llvm/test/CodeGen/AMDGPU/amdgcn.private-memory.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
1919
; GCN-ALLOCA: v_add_{{[iu]}}32_e32 [[RESULT:v[0-9]+]], vcc, v{{[0-9]+}}, v0
2020

2121
; GCN-PROMOTE: s_cmp_eq_u32 [[IN]], 1
22-
; GCN-PROMOTE-NEXT: s_cselect_b64 vcc, -1, 0
23-
; GCN-PROMOTE-NEXT: v_addc_u32_e32 [[RESULT:v[0-9]+]], vcc, 0, v0, vcc
22+
; GCN-PROMOTE-NEXT: s_cselect_b32 [[SCC:s[0-9]+]], 1, 0
23+
; GCN-PROMOTE-NEXT: v_add_{{[iu]}}32_e32 [[RESULT:v[0-9]+]], vcc, [[SCC]], v0
2424

2525
; GCN: buffer_store_dword [[RESULT]]
2626
define amdgpu_kernel void @work_item_info(ptr addrspace(1) %out, i32 %in) {

llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -82,17 +82,16 @@ define float @test_pow_fast_f32__integral_y(float %x, i32 %y.i) {
8282
; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v1
8383
; CHECK-NEXT: s_mov_b32 s4, 0x800000
8484
; CHECK-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
85-
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
85+
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 32, vcc
8686
; CHECK-NEXT: v_cvt_i32_f32_e32 v1, v1
87-
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 5, v3
8887
; CHECK-NEXT: v_ldexp_f32 v3, |v0|, v3
8988
; CHECK-NEXT: v_log_f32_e32 v3, v3
90-
; CHECK-NEXT: v_cvt_f32_i32_e32 v4, v1
9189
; CHECK-NEXT: v_mov_b32_e32 v2, 0x42000000
90+
; CHECK-NEXT: v_cvt_f32_i32_e32 v4, v1
9291
; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
9392
; CHECK-NEXT: v_sub_f32_e32 v2, v3, v2
94-
; CHECK-NEXT: v_mul_f32_e32 v3, v2, v4
9593
; CHECK-NEXT: s_mov_b32 s4, 0xc2fc0000
94+
; CHECK-NEXT: v_mul_f32_e32 v3, v2, v4
9695
; CHECK-NEXT: v_mov_b32_e32 v5, 0x42800000
9796
; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, s4, v3
9897
; CHECK-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
@@ -228,8 +227,7 @@ define float @test_powr_fast_f32(float %x, float %y) {
228227
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
229228
; CHECK-NEXT: s_mov_b32 s4, 0x800000
230229
; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
231-
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
232-
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 5, v3
230+
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 32, vcc
233231
; CHECK-NEXT: v_ldexp_f32 v0, v0, v3
234232
; CHECK-NEXT: v_log_f32_e32 v0, v0
235233
; CHECK-NEXT: v_mov_b32_e32 v2, 0x42000000
@@ -368,8 +366,7 @@ define float @test_pown_fast_f32(float %x, i32 %y) {
368366
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
369367
; CHECK-NEXT: s_mov_b32 s4, 0x800000
370368
; CHECK-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
371-
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
372-
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 5, v3
369+
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 32, vcc
373370
; CHECK-NEXT: v_ldexp_f32 v3, |v0|, v3
374371
; CHECK-NEXT: v_log_f32_e32 v3, v3
375372
; CHECK-NEXT: v_cvt_f32_i32_e32 v4, v1
@@ -511,8 +508,7 @@ define float @test_pown_fast_f32_known_even(float %x, i32 %y.arg) {
511508
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
512509
; CHECK-NEXT: s_mov_b32 s4, 0x800000
513510
; CHECK-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
514-
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
515-
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 5, v3
511+
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 32, vcc
516512
; CHECK-NEXT: v_ldexp_f32 v0, |v0|, v3
517513
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 1, v1
518514
; CHECK-NEXT: v_log_f32_e32 v0, v0
@@ -651,8 +647,7 @@ define float @test_pown_fast_f32_known_odd(float %x, i32 %y.arg) {
651647
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
652648
; CHECK-NEXT: s_mov_b32 s4, 0x800000
653649
; CHECK-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
654-
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
655-
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 5, v3
650+
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 32, vcc
656651
; CHECK-NEXT: v_ldexp_f32 v3, |v0|, v3
657652
; CHECK-NEXT: v_or_b32_e32 v1, 1, v1
658653
; CHECK-NEXT: v_log_f32_e32 v3, v3

0 commit comments

Comments
 (0)