Skip to content

Commit 355c2e3

Browse files
committed
[AMDGPU][AArch64][SelectionDAG] Added target hook check for Select folding.
The Target hook convertSelectOfConstantsToMath() needs to be used within SimplifySelectCC helper combine function in SelectionDAG Isel, where generic select folding with constants is happening into simple maths op using the condition as it is. As for AAarch64, based on selectWithConstant LIT tests, it apparently seems beneficial for it to have convertSelectOfConstantsToMath() set as TRUE against the defualt value. It fixes perf regression #121145.
1 parent 6e53270 commit 355c2e3

39 files changed

+2881
-3314
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28189,7 +28189,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
2818928189
bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
2819028190
bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
2819128191

28192-
if ((Fold || Swap) &&
28192+
if (TLI.convertSelectOfConstantsToMath(VT) && (Fold || Swap) &&
2819328193
TLI.getBooleanContents(CmpOpVT) ==
2819428194
TargetLowering::ZeroOrOneBooleanContent &&
2819528195
(!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -796,6 +796,8 @@ class AArch64TargetLowering : public TargetLowering {
796796
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
797797
Type *Ty) const override;
798798

799+
bool convertSelectOfConstantsToMath(EVT VT) const override { return true; }
800+
799801
/// Return true if EXTRACT_SUBVECTOR is cheap for this result type
800802
/// with this index.
801803
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,

llvm/test/CodeGen/AArch64/arm64-ccmp.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -501,8 +501,8 @@ define float @select_or_float(i32 %w0, i32 %w1, float %x2, float %x3) {
501501
define i64 @gccbug(i64 %x0, i64 %x1) {
502502
; SDISEL-LABEL: gccbug:
503503
; SDISEL: ; %bb.0:
504-
; SDISEL-NEXT: cmp x0, #2
505-
; SDISEL-NEXT: ccmp x0, #4, #4, ne
504+
; SDISEL-NEXT: cmp x0, #4
505+
; SDISEL-NEXT: ccmp x0, #2, #4, ne
506506
; SDISEL-NEXT: ccmp x1, #0, #0, eq
507507
; SDISEL-NEXT: mov w8, #1 ; =0x1
508508
; SDISEL-NEXT: cinc x0, x8, eq

llvm/test/CodeGen/AArch64/arm64-csel.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -295,9 +295,8 @@ entry:
295295
define i64 @foo18_overflow3(i1 %cmp) nounwind readnone optsize ssp {
296296
; CHECK-LABEL: foo18_overflow3:
297297
; CHECK: // %bb.0: // %entry
298-
; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
299-
; CHECK-NEXT: tst w0, #0x1
300-
; CHECK-NEXT: csel x0, x8, xzr, ne
298+
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
299+
; CHECK-NEXT: lsl x0, x0, #63
301300
; CHECK-NEXT: ret
302301
entry:
303302
%. = select i1 %cmp, i64 -9223372036854775808, i64 0

llvm/test/CodeGen/AArch64/arm64-zip.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -455,7 +455,7 @@ define <4 x i32> @shuffle_zip3(<4 x i32> %arg) {
455455
; CHECK-NEXT: zip2.4h v0, v0, v1
456456
; CHECK-NEXT: movi.4s v1, #1
457457
; CHECK-NEXT: zip1.4h v0, v0, v0
458-
; CHECK-NEXT: sshll.4s v0, v0, #0
458+
; CHECK-NEXT: ushll.4s v0, v0, #0
459459
; CHECK-NEXT: and.16b v0, v0, v1
460460
; CHECK-NEXT: ret
461461
bb:

llvm/test/CodeGen/AArch64/cmp-select-sign.ll

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -241,18 +241,14 @@ define <4 x i32> @not_sign_4xi32_3(<4 x i32> %a) {
241241
define <4 x i65> @sign_4xi65(<4 x i65> %a) {
242242
; CHECK-LABEL: sign_4xi65:
243243
; CHECK: // %bb.0:
244-
; CHECK-NEXT: sbfx x8, x5, #0, #1
245-
; CHECK-NEXT: sbfx x9, x3, #0, #1
246-
; CHECK-NEXT: sbfx x10, x1, #0, #1
247-
; CHECK-NEXT: sbfx x11, x7, #0, #1
248-
; CHECK-NEXT: lsr x1, x10, #63
249-
; CHECK-NEXT: lsr x3, x9, #63
250-
; CHECK-NEXT: lsr x5, x8, #63
251-
; CHECK-NEXT: lsr x7, x11, #63
252-
; CHECK-NEXT: orr x0, x10, #0x1
253-
; CHECK-NEXT: orr x2, x9, #0x1
254-
; CHECK-NEXT: orr x4, x8, #0x1
255-
; CHECK-NEXT: orr x6, x11, #0x1
244+
; CHECK-NEXT: sbfx x3, x3, #0, #1
245+
; CHECK-NEXT: sbfx x1, x1, #0, #1
246+
; CHECK-NEXT: sbfx x7, x7, #0, #1
247+
; CHECK-NEXT: sbfx x5, x5, #0, #1
248+
; CHECK-NEXT: orr x0, x1, #0x1
249+
; CHECK-NEXT: orr x2, x3, #0x1
250+
; CHECK-NEXT: orr x6, x7, #0x1
251+
; CHECK-NEXT: orr x4, x5, #0x1
256252
; CHECK-NEXT: ret
257253
%c = icmp sgt <4 x i65> %a, <i65 -1, i65 -1, i65 -1, i65 -1>
258254
%res = select <4 x i1> %c, <4 x i65> <i65 1, i65 1, i65 1, i65 1>, <4 x i65 > <i65 -1, i65 -1, i65 -1, i65 -1>

llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -184,10 +184,10 @@ define double @fmul_pow_shl_cnt2(i64 %cnt) nounwind {
184184
define float @fmul_pow_select(i32 %cnt, i1 %c) nounwind {
185185
; CHECK-LABEL: fmul_pow_select:
186186
; CHECK: // %bb.0:
187-
; CHECK-NEXT: mov w8, #1 // =0x1
188-
; CHECK-NEXT: tst w1, #0x1
187+
; CHECK-NEXT: mov w8, #2 // =0x2
188+
; CHECK-NEXT: and w9, w1, #0x1
189189
; CHECK-NEXT: fmov s1, #9.00000000
190-
; CHECK-NEXT: cinc w8, w8, eq
190+
; CHECK-NEXT: sub w8, w8, w9
191191
; CHECK-NEXT: lsl w8, w8, w0
192192
; CHECK-NEXT: ucvtf s0, w8
193193
; CHECK-NEXT: fmul s0, s0, s1

llvm/test/CodeGen/AArch64/i128-math.ll

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -457,17 +457,19 @@ define i128 @i128_saturating_mul(i128 %x, i128 %y) {
457457
; CHECK-NEXT: adc x10, x13, x14
458458
; CHECK-NEXT: adds x8, x11, x8
459459
; CHECK-NEXT: asr x11, x9, #63
460-
; CHECK-NEXT: mul x13, x0, x2
460+
; CHECK-NEXT: eor x13, x3, x1
461+
; CHECK-NEXT: mul x14, x0, x2
461462
; CHECK-NEXT: adc x10, x12, x10
462-
; CHECK-NEXT: eor x12, x3, x1
463+
; CHECK-NEXT: lsr x12, x13, #63
463464
; CHECK-NEXT: eor x8, x8, x11
464465
; CHECK-NEXT: eor x10, x10, x11
465-
; CHECK-NEXT: asr x11, x12, #63
466+
; CHECK-NEXT: mov x11, #9223372036854775807 // =0x7fffffffffffffff
466467
; CHECK-NEXT: orr x8, x8, x10
467-
; CHECK-NEXT: eor x10, x11, #0x7fffffffffffffff
468+
; CHECK-NEXT: subs x10, x12, #1
469+
; CHECK-NEXT: adc x11, xzr, x11
468470
; CHECK-NEXT: cmp x8, #0
469-
; CHECK-NEXT: csinv x0, x13, x11, eq
470-
; CHECK-NEXT: csel x1, x10, x9, ne
471+
; CHECK-NEXT: csel x0, x10, x14, ne
472+
; CHECK-NEXT: csel x1, x11, x9, ne
471473
; CHECK-NEXT: ret
472474
%1 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y)
473475
%2 = extractvalue { i128, i1 } %1, 0

llvm/test/CodeGen/AArch64/midpoint-int.ll

Lines changed: 90 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -271,14 +271,15 @@ define i64 @scalar_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
271271
define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind {
272272
; CHECK-LABEL: scalar_i16_signed_reg_reg:
273273
; CHECK: // %bb.0:
274-
; CHECK-NEXT: sxth w9, w1
275-
; CHECK-NEXT: sxth w10, w0
276-
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
277-
; CHECK-NEXT: subs w9, w10, w9
278-
; CHECK-NEXT: cneg w9, w9, mi
279-
; CHECK-NEXT: cneg w8, w8, le
280-
; CHECK-NEXT: lsr w9, w9, #1
281-
; CHECK-NEXT: madd w0, w9, w8, w0
274+
; CHECK-NEXT: sxth w8, w1
275+
; CHECK-NEXT: sxth w9, w0
276+
; CHECK-NEXT: subs w8, w9, w8
277+
; CHECK-NEXT: cset w9, gt
278+
; CHECK-NEXT: cneg w8, w8, mi
279+
; CHECK-NEXT: sbfx w9, w9, #0, #1
280+
; CHECK-NEXT: lsr w8, w8, #1
281+
; CHECK-NEXT: orr w9, w9, #0x1
282+
; CHECK-NEXT: madd w0, w8, w9, w0
282283
; CHECK-NEXT: ret
283284
%t3 = icmp sgt i16 %a1, %a2 ; signed
284285
%t4 = select i1 %t3, i16 -1, i16 1
@@ -294,14 +295,15 @@ define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind {
294295
define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind {
295296
; CHECK-LABEL: scalar_i16_unsigned_reg_reg:
296297
; CHECK: // %bb.0:
297-
; CHECK-NEXT: and w9, w1, #0xffff
298-
; CHECK-NEXT: and w10, w0, #0xffff
299-
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
300-
; CHECK-NEXT: subs w9, w10, w9
301-
; CHECK-NEXT: cneg w9, w9, mi
302-
; CHECK-NEXT: cneg w8, w8, ls
303-
; CHECK-NEXT: lsr w9, w9, #1
304-
; CHECK-NEXT: madd w0, w9, w8, w0
298+
; CHECK-NEXT: and w8, w1, #0xffff
299+
; CHECK-NEXT: and w9, w0, #0xffff
300+
; CHECK-NEXT: subs w8, w9, w8
301+
; CHECK-NEXT: cset w9, hi
302+
; CHECK-NEXT: cneg w8, w8, mi
303+
; CHECK-NEXT: sbfx w9, w9, #0, #1
304+
; CHECK-NEXT: lsr w8, w8, #1
305+
; CHECK-NEXT: orr w9, w9, #0x1
306+
; CHECK-NEXT: madd w0, w8, w9, w0
305307
; CHECK-NEXT: ret
306308
%t3 = icmp ugt i16 %a1, %a2
307309
%t4 = select i1 %t3, i16 -1, i16 1
@@ -319,14 +321,15 @@ define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind {
319321
define i16 @scalar_i16_signed_mem_reg(ptr %a1_addr, i16 %a2) nounwind {
320322
; CHECK-LABEL: scalar_i16_signed_mem_reg:
321323
; CHECK: // %bb.0:
322-
; CHECK-NEXT: sxth w9, w1
323-
; CHECK-NEXT: ldrsh w10, [x0]
324-
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
325-
; CHECK-NEXT: subs w9, w10, w9
326-
; CHECK-NEXT: cneg w9, w9, mi
327-
; CHECK-NEXT: cneg w8, w8, le
328-
; CHECK-NEXT: lsr w9, w9, #1
329-
; CHECK-NEXT: madd w0, w9, w8, w10
324+
; CHECK-NEXT: sxth w8, w1
325+
; CHECK-NEXT: ldrsh w9, [x0]
326+
; CHECK-NEXT: subs w8, w9, w8
327+
; CHECK-NEXT: cset w10, gt
328+
; CHECK-NEXT: cneg w8, w8, mi
329+
; CHECK-NEXT: sbfx w10, w10, #0, #1
330+
; CHECK-NEXT: lsr w8, w8, #1
331+
; CHECK-NEXT: orr w10, w10, #0x1
332+
; CHECK-NEXT: madd w0, w8, w10, w9
330333
; CHECK-NEXT: ret
331334
%a1 = load i16, ptr %a1_addr
332335
%t3 = icmp sgt i16 %a1, %a2 ; signed
@@ -343,14 +346,15 @@ define i16 @scalar_i16_signed_mem_reg(ptr %a1_addr, i16 %a2) nounwind {
343346
define i16 @scalar_i16_signed_reg_mem(i16 %a1, ptr %a2_addr) nounwind {
344347
; CHECK-LABEL: scalar_i16_signed_reg_mem:
345348
; CHECK: // %bb.0:
346-
; CHECK-NEXT: sxth w9, w0
347-
; CHECK-NEXT: ldrsh w10, [x1]
348-
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
349-
; CHECK-NEXT: subs w9, w9, w10
350-
; CHECK-NEXT: cneg w9, w9, mi
351-
; CHECK-NEXT: cneg w8, w8, le
352-
; CHECK-NEXT: lsr w9, w9, #1
353-
; CHECK-NEXT: madd w0, w9, w8, w0
349+
; CHECK-NEXT: sxth w8, w0
350+
; CHECK-NEXT: ldrsh w9, [x1]
351+
; CHECK-NEXT: subs w8, w8, w9
352+
; CHECK-NEXT: cset w9, gt
353+
; CHECK-NEXT: cneg w8, w8, mi
354+
; CHECK-NEXT: sbfx w9, w9, #0, #1
355+
; CHECK-NEXT: lsr w8, w8, #1
356+
; CHECK-NEXT: orr w9, w9, #0x1
357+
; CHECK-NEXT: madd w0, w8, w9, w0
354358
; CHECK-NEXT: ret
355359
%a2 = load i16, ptr %a2_addr
356360
%t3 = icmp sgt i16 %a1, %a2 ; signed
@@ -367,14 +371,15 @@ define i16 @scalar_i16_signed_reg_mem(i16 %a1, ptr %a2_addr) nounwind {
367371
define i16 @scalar_i16_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
368372
; CHECK-LABEL: scalar_i16_signed_mem_mem:
369373
; CHECK: // %bb.0:
370-
; CHECK-NEXT: ldrsh w9, [x0]
371-
; CHECK-NEXT: ldrsh w10, [x1]
372-
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
373-
; CHECK-NEXT: subs w10, w9, w10
374-
; CHECK-NEXT: cneg w10, w10, mi
375-
; CHECK-NEXT: cneg w8, w8, le
376-
; CHECK-NEXT: lsr w10, w10, #1
377-
; CHECK-NEXT: madd w0, w10, w8, w9
374+
; CHECK-NEXT: ldrsh w8, [x0]
375+
; CHECK-NEXT: ldrsh w9, [x1]
376+
; CHECK-NEXT: subs w9, w8, w9
377+
; CHECK-NEXT: cset w10, gt
378+
; CHECK-NEXT: cneg w9, w9, mi
379+
; CHECK-NEXT: sbfx w10, w10, #0, #1
380+
; CHECK-NEXT: lsr w9, w9, #1
381+
; CHECK-NEXT: orr w10, w10, #0x1
382+
; CHECK-NEXT: madd w0, w9, w10, w8
378383
; CHECK-NEXT: ret
379384
%a1 = load i16, ptr %a1_addr
380385
%a2 = load i16, ptr %a2_addr
@@ -398,14 +403,15 @@ define i16 @scalar_i16_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
398403
define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind {
399404
; CHECK-LABEL: scalar_i8_signed_reg_reg:
400405
; CHECK: // %bb.0:
401-
; CHECK-NEXT: sxtb w9, w1
402-
; CHECK-NEXT: sxtb w10, w0
403-
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
404-
; CHECK-NEXT: subs w9, w10, w9
405-
; CHECK-NEXT: cneg w9, w9, mi
406-
; CHECK-NEXT: cneg w8, w8, le
407-
; CHECK-NEXT: lsr w9, w9, #1
408-
; CHECK-NEXT: madd w0, w9, w8, w0
406+
; CHECK-NEXT: sxtb w8, w1
407+
; CHECK-NEXT: sxtb w9, w0
408+
; CHECK-NEXT: subs w8, w9, w8
409+
; CHECK-NEXT: cset w9, gt
410+
; CHECK-NEXT: cneg w8, w8, mi
411+
; CHECK-NEXT: sbfx w9, w9, #0, #1
412+
; CHECK-NEXT: lsr w8, w8, #1
413+
; CHECK-NEXT: orr w9, w9, #0x1
414+
; CHECK-NEXT: madd w0, w8, w9, w0
409415
; CHECK-NEXT: ret
410416
%t3 = icmp sgt i8 %a1, %a2 ; signed
411417
%t4 = select i1 %t3, i8 -1, i8 1
@@ -421,14 +427,15 @@ define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind {
421427
define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind {
422428
; CHECK-LABEL: scalar_i8_unsigned_reg_reg:
423429
; CHECK: // %bb.0:
424-
; CHECK-NEXT: and w9, w1, #0xff
425-
; CHECK-NEXT: and w10, w0, #0xff
426-
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
427-
; CHECK-NEXT: subs w9, w10, w9
428-
; CHECK-NEXT: cneg w9, w9, mi
429-
; CHECK-NEXT: cneg w8, w8, ls
430-
; CHECK-NEXT: lsr w9, w9, #1
431-
; CHECK-NEXT: madd w0, w9, w8, w0
430+
; CHECK-NEXT: and w8, w1, #0xff
431+
; CHECK-NEXT: and w9, w0, #0xff
432+
; CHECK-NEXT: subs w8, w9, w8
433+
; CHECK-NEXT: cset w9, hi
434+
; CHECK-NEXT: cneg w8, w8, mi
435+
; CHECK-NEXT: sbfx w9, w9, #0, #1
436+
; CHECK-NEXT: lsr w8, w8, #1
437+
; CHECK-NEXT: orr w9, w9, #0x1
438+
; CHECK-NEXT: madd w0, w8, w9, w0
432439
; CHECK-NEXT: ret
433440
%t3 = icmp ugt i8 %a1, %a2
434441
%t4 = select i1 %t3, i8 -1, i8 1
@@ -446,14 +453,15 @@ define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind {
446453
define i8 @scalar_i8_signed_mem_reg(ptr %a1_addr, i8 %a2) nounwind {
447454
; CHECK-LABEL: scalar_i8_signed_mem_reg:
448455
; CHECK: // %bb.0:
449-
; CHECK-NEXT: sxtb w9, w1
450-
; CHECK-NEXT: ldrsb w10, [x0]
451-
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
452-
; CHECK-NEXT: subs w9, w10, w9
453-
; CHECK-NEXT: cneg w9, w9, mi
454-
; CHECK-NEXT: cneg w8, w8, le
455-
; CHECK-NEXT: lsr w9, w9, #1
456-
; CHECK-NEXT: madd w0, w9, w8, w10
456+
; CHECK-NEXT: sxtb w8, w1
457+
; CHECK-NEXT: ldrsb w9, [x0]
458+
; CHECK-NEXT: subs w8, w9, w8
459+
; CHECK-NEXT: cset w10, gt
460+
; CHECK-NEXT: cneg w8, w8, mi
461+
; CHECK-NEXT: sbfx w10, w10, #0, #1
462+
; CHECK-NEXT: lsr w8, w8, #1
463+
; CHECK-NEXT: orr w10, w10, #0x1
464+
; CHECK-NEXT: madd w0, w8, w10, w9
457465
; CHECK-NEXT: ret
458466
%a1 = load i8, ptr %a1_addr
459467
%t3 = icmp sgt i8 %a1, %a2 ; signed
@@ -470,14 +478,15 @@ define i8 @scalar_i8_signed_mem_reg(ptr %a1_addr, i8 %a2) nounwind {
470478
define i8 @scalar_i8_signed_reg_mem(i8 %a1, ptr %a2_addr) nounwind {
471479
; CHECK-LABEL: scalar_i8_signed_reg_mem:
472480
; CHECK: // %bb.0:
473-
; CHECK-NEXT: sxtb w9, w0
474-
; CHECK-NEXT: ldrsb w10, [x1]
475-
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
476-
; CHECK-NEXT: subs w9, w9, w10
477-
; CHECK-NEXT: cneg w9, w9, mi
478-
; CHECK-NEXT: cneg w8, w8, le
479-
; CHECK-NEXT: lsr w9, w9, #1
480-
; CHECK-NEXT: madd w0, w9, w8, w0
481+
; CHECK-NEXT: sxtb w8, w0
482+
; CHECK-NEXT: ldrsb w9, [x1]
483+
; CHECK-NEXT: subs w8, w8, w9
484+
; CHECK-NEXT: cset w9, gt
485+
; CHECK-NEXT: cneg w8, w8, mi
486+
; CHECK-NEXT: sbfx w9, w9, #0, #1
487+
; CHECK-NEXT: lsr w8, w8, #1
488+
; CHECK-NEXT: orr w9, w9, #0x1
489+
; CHECK-NEXT: madd w0, w8, w9, w0
481490
; CHECK-NEXT: ret
482491
%a2 = load i8, ptr %a2_addr
483492
%t3 = icmp sgt i8 %a1, %a2 ; signed
@@ -494,14 +503,15 @@ define i8 @scalar_i8_signed_reg_mem(i8 %a1, ptr %a2_addr) nounwind {
494503
define i8 @scalar_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
495504
; CHECK-LABEL: scalar_i8_signed_mem_mem:
496505
; CHECK: // %bb.0:
497-
; CHECK-NEXT: ldrsb w9, [x0]
498-
; CHECK-NEXT: ldrsb w10, [x1]
499-
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
500-
; CHECK-NEXT: subs w10, w9, w10
501-
; CHECK-NEXT: cneg w10, w10, mi
502-
; CHECK-NEXT: cneg w8, w8, le
503-
; CHECK-NEXT: lsr w10, w10, #1
504-
; CHECK-NEXT: madd w0, w10, w8, w9
506+
; CHECK-NEXT: ldrsb w8, [x0]
507+
; CHECK-NEXT: ldrsb w9, [x1]
508+
; CHECK-NEXT: subs w9, w8, w9
509+
; CHECK-NEXT: cset w10, gt
510+
; CHECK-NEXT: cneg w9, w9, mi
511+
; CHECK-NEXT: sbfx w10, w10, #0, #1
512+
; CHECK-NEXT: lsr w9, w9, #1
513+
; CHECK-NEXT: orr w10, w10, #0x1
514+
; CHECK-NEXT: madd w0, w9, w10, w8
505515
; CHECK-NEXT: ret
506516
%a1 = load i8, ptr %a1_addr
507517
%a2 = load i8, ptr %a2_addr

llvm/test/CodeGen/AArch64/select-constant-xor.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,10 @@ define i64 @selecti64i64(i64 %a) {
2727
define i32 @selecti64i32(i64 %a) {
2828
; CHECK-LABEL: selecti64i32:
2929
; CHECK: // %bb.0:
30-
; CHECK-NEXT: asr x8, x0, #63
31-
; CHECK-NEXT: eor w0, w8, #0x7fffffff
30+
; CHECK-NEXT: lsr x9, x0, #63
31+
; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000
32+
; CHECK-NEXT: eor w9, w9, #0x1
33+
; CHECK-NEXT: sub w0, w8, w9
3234
; CHECK-NEXT: ret
3335
%c = icmp sgt i64 %a, -1
3436
%s = select i1 %c, i32 2147483647, i32 -2147483648

0 commit comments

Comments
 (0)