Skip to content

Commit 2e77aea

Browse files
authored
[RISCV] Give up on correct undef semantics in mul strength reduction (#90097)
This is a change I really don't like posting, but I think we're out of other options. As can be seen in the test differences, we have cases where adding the freeze inhibits real optimizations. Given no other target handles the undef semantics correctly here, I think the practical answer is that we shouldn't either. Yuck. As examples, consider: * combineMulSpecial in X86. * performMulCombine in AArch64 The only other real option I see here is to move all of the strength reduction code out of ISEL. We could do this either via tablegen rules, or as an MI pass, but other than shifting the point where we ignore undef semantics, I don't this is meaningfully different. Note that the particular tests included here would be fixed if we added SHA/SHL to canCreateUndefOrPoison. However, a) that's already been tried twice and exposes its own set of regressions, and b) these are simply examples. You can create many alternate examples.
1 parent 3dcd2cc commit 2e77aea

File tree

2 files changed

+130
-8
lines changed

2 files changed

+130
-8
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13416,6 +13416,12 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
1341613416
return SDValue();
1341713417
uint64_t MulAmt = CNode->getZExtValue();
1341813418

13419+
// WARNING: The code below is knowingly incorrect with regards to undef semantics.
13420+
// We're adding additional uses of X here, and in principle, we should be freezing
13421+
// X before doing so. However, adding freeze here causes real regressions, and no
13422+
// other target properly freezes X in these cases either.
13423+
SDValue X = N->getOperand(0);
13424+
1341913425
for (uint64_t Divisor : {3, 5, 9}) {
1342013426
if (MulAmt % Divisor != 0)
1342113427
continue;
@@ -13428,7 +13434,6 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
1342813434
// 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
1342913435
if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
1343013436
SDLoc DL(N);
13431-
SDValue X = DAG.getFreeze(N->getOperand(0));
1343213437
SDValue Mul359 =
1343313438
DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
1343413439
DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
@@ -13446,7 +13451,6 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
1344613451
if (ScaleShift >= 1 && ScaleShift < 4) {
1344713452
unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
1344813453
SDLoc DL(N);
13449-
SDValue X = DAG.getFreeze(N->getOperand(0));
1345013454
SDValue Shift1 =
1345113455
DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
1345213456
return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
@@ -13466,7 +13470,6 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
1346613470
unsigned TZ = llvm::countr_zero(C);
1346713471
if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
1346813472
SDLoc DL(N);
13469-
SDValue X = DAG.getFreeze(N->getOperand(0));
1347013473
SDValue Mul359 =
1347113474
DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
1347213475
DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
@@ -13481,7 +13484,6 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
1348113484
if (ScaleShift >= 1 && ScaleShift < 4) {
1348213485
unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
1348313486
SDLoc DL(N);
13484-
SDValue X = DAG.getFreeze(N->getOperand(0));
1348513487
SDValue Shift1 =
1348613488
DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
1348713489
return DAG.getNode(ISD::ADD, DL, VT, Shift1,
@@ -13495,11 +13497,11 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
1349513497
if (isPowerOf2_64(MulAmt + Offset)) {
1349613498
SDLoc DL(N);
1349713499
SDValue Shift1 =
13498-
DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
13500+
DAG.getNode(ISD::SHL, DL, VT, X,
1349913501
DAG.getConstant(Log2_64(MulAmt + Offset), DL, VT));
13500-
SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, N->getOperand(0),
13502+
SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
1350113503
DAG.getConstant(Log2_64(Offset - 1), DL, VT),
13502-
N->getOperand(0));
13504+
X);
1350313505
return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
1350413506
}
1350513507
}

llvm/test/CodeGen/RISCV/rv64zba.ll

Lines changed: 121 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44
; RUN: llc -mtriple=riscv64 -mattr=+m,+zba -verify-machineinstrs < %s \
55
; RUN: | FileCheck %s -check-prefixes=CHECK,RV64ZBA,RV64ZBANOZBB
66
; RUN: llc -mtriple=riscv64 -mattr=+m,+zba,+zbb -verify-machineinstrs < %s \
7-
; RUN: | FileCheck %s -check-prefixes=CHECK,RV64ZBA,RV64ZBAZBB
7+
; RUN: | FileCheck %s -check-prefixes=CHECK,RV64ZBA,RV64ZBAZBB,RV64ZBAZBBNOZBS
8+
; RUN: llc -mtriple=riscv64 -mattr=+m,+zba,+zbb,+zbs -verify-machineinstrs < %s \
9+
; RUN: | FileCheck %s -check-prefixes=CHECK,RV64ZBA,RV64ZBAZBB,RV64ZBAZBBZBS
810

911
define i64 @slliuw(i64 %a) nounwind {
1012
; RV64I-LABEL: slliuw:
@@ -2733,3 +2735,121 @@ define i64 @mul_neg8(i64 %a) {
27332735
%c = mul i64 %a, -8
27342736
ret i64 %c
27352737
}
2738+
2739+
define i64 @bext_mul12(i32 %1, i32 %2) {
2740+
; RV64I-LABEL: bext_mul12:
2741+
; RV64I: # %bb.0: # %entry
2742+
; RV64I-NEXT: srlw a0, a0, a1
2743+
; RV64I-NEXT: andi a0, a0, 1
2744+
; RV64I-NEXT: li a1, 12
2745+
; RV64I-NEXT: mul a0, a0, a1
2746+
; RV64I-NEXT: ret
2747+
;
2748+
; RV64ZBANOZBB-LABEL: bext_mul12:
2749+
; RV64ZBANOZBB: # %bb.0: # %entry
2750+
; RV64ZBANOZBB-NEXT: srlw a0, a0, a1
2751+
; RV64ZBANOZBB-NEXT: andi a0, a0, 1
2752+
; RV64ZBANOZBB-NEXT: sh1add a0, a0, a0
2753+
; RV64ZBANOZBB-NEXT: slli a0, a0, 2
2754+
; RV64ZBANOZBB-NEXT: ret
2755+
;
2756+
; RV64ZBAZBBNOZBS-LABEL: bext_mul12:
2757+
; RV64ZBAZBBNOZBS: # %bb.0: # %entry
2758+
; RV64ZBAZBBNOZBS-NEXT: srlw a0, a0, a1
2759+
; RV64ZBAZBBNOZBS-NEXT: andi a0, a0, 1
2760+
; RV64ZBAZBBNOZBS-NEXT: sh1add a0, a0, a0
2761+
; RV64ZBAZBBNOZBS-NEXT: slli a0, a0, 2
2762+
; RV64ZBAZBBNOZBS-NEXT: ret
2763+
;
2764+
; RV64ZBAZBBZBS-LABEL: bext_mul12:
2765+
; RV64ZBAZBBZBS: # %bb.0: # %entry
2766+
; RV64ZBAZBBZBS-NEXT: bext a0, a0, a1
2767+
; RV64ZBAZBBZBS-NEXT: sh1add a0, a0, a0
2768+
; RV64ZBAZBBZBS-NEXT: slli a0, a0, 2
2769+
; RV64ZBAZBBZBS-NEXT: ret
2770+
entry:
2771+
%3 = lshr i32 %1, %2
2772+
%4 = and i32 %3, 1
2773+
%5 = zext nneg i32 %4 to i64
2774+
%6 = mul i64 %5, 12
2775+
ret i64 %6
2776+
}
2777+
2778+
define i64 @bext_mul45(i32 %1, i32 %2) {
2779+
; RV64I-LABEL: bext_mul45:
2780+
; RV64I: # %bb.0: # %entry
2781+
; RV64I-NEXT: srlw a0, a0, a1
2782+
; RV64I-NEXT: andi a0, a0, 1
2783+
; RV64I-NEXT: li a1, 45
2784+
; RV64I-NEXT: mul a0, a0, a1
2785+
; RV64I-NEXT: ret
2786+
;
2787+
; RV64ZBANOZBB-LABEL: bext_mul45:
2788+
; RV64ZBANOZBB: # %bb.0: # %entry
2789+
; RV64ZBANOZBB-NEXT: srlw a0, a0, a1
2790+
; RV64ZBANOZBB-NEXT: andi a0, a0, 1
2791+
; RV64ZBANOZBB-NEXT: sh2add a0, a0, a0
2792+
; RV64ZBANOZBB-NEXT: sh3add a0, a0, a0
2793+
; RV64ZBANOZBB-NEXT: ret
2794+
;
2795+
; RV64ZBAZBBNOZBS-LABEL: bext_mul45:
2796+
; RV64ZBAZBBNOZBS: # %bb.0: # %entry
2797+
; RV64ZBAZBBNOZBS-NEXT: srlw a0, a0, a1
2798+
; RV64ZBAZBBNOZBS-NEXT: andi a0, a0, 1
2799+
; RV64ZBAZBBNOZBS-NEXT: sh2add a0, a0, a0
2800+
; RV64ZBAZBBNOZBS-NEXT: sh3add a0, a0, a0
2801+
; RV64ZBAZBBNOZBS-NEXT: ret
2802+
;
2803+
; RV64ZBAZBBZBS-LABEL: bext_mul45:
2804+
; RV64ZBAZBBZBS: # %bb.0: # %entry
2805+
; RV64ZBAZBBZBS-NEXT: bext a0, a0, a1
2806+
; RV64ZBAZBBZBS-NEXT: sh2add a0, a0, a0
2807+
; RV64ZBAZBBZBS-NEXT: sh3add a0, a0, a0
2808+
; RV64ZBAZBBZBS-NEXT: ret
2809+
entry:
2810+
%3 = lshr i32 %1, %2
2811+
%4 = and i32 %3, 1
2812+
%5 = zext nneg i32 %4 to i64
2813+
%6 = mul i64 %5, 45
2814+
ret i64 %6
2815+
}
2816+
2817+
define i64 @bext_mul132(i32 %1, i32 %2) {
2818+
; RV64I-LABEL: bext_mul132:
2819+
; RV64I: # %bb.0: # %entry
2820+
; RV64I-NEXT: srlw a0, a0, a1
2821+
; RV64I-NEXT: andi a0, a0, 1
2822+
; RV64I-NEXT: li a1, 132
2823+
; RV64I-NEXT: mul a0, a0, a1
2824+
; RV64I-NEXT: ret
2825+
;
2826+
; RV64ZBANOZBB-LABEL: bext_mul132:
2827+
; RV64ZBANOZBB: # %bb.0: # %entry
2828+
; RV64ZBANOZBB-NEXT: srlw a0, a0, a1
2829+
; RV64ZBANOZBB-NEXT: andi a0, a0, 1
2830+
; RV64ZBANOZBB-NEXT: slli a1, a0, 7
2831+
; RV64ZBANOZBB-NEXT: sh2add a0, a0, a1
2832+
; RV64ZBANOZBB-NEXT: ret
2833+
;
2834+
; RV64ZBAZBBNOZBS-LABEL: bext_mul132:
2835+
; RV64ZBAZBBNOZBS: # %bb.0: # %entry
2836+
; RV64ZBAZBBNOZBS-NEXT: srlw a0, a0, a1
2837+
; RV64ZBAZBBNOZBS-NEXT: andi a0, a0, 1
2838+
; RV64ZBAZBBNOZBS-NEXT: slli a1, a0, 7
2839+
; RV64ZBAZBBNOZBS-NEXT: sh2add a0, a0, a1
2840+
; RV64ZBAZBBNOZBS-NEXT: ret
2841+
;
2842+
; RV64ZBAZBBZBS-LABEL: bext_mul132:
2843+
; RV64ZBAZBBZBS: # %bb.0: # %entry
2844+
; RV64ZBAZBBZBS-NEXT: bext a0, a0, a1
2845+
; RV64ZBAZBBZBS-NEXT: slli a1, a0, 7
2846+
; RV64ZBAZBBZBS-NEXT: sh2add a0, a0, a1
2847+
; RV64ZBAZBBZBS-NEXT: ret
2848+
entry:
2849+
%3 = lshr i32 %1, %2
2850+
%4 = and i32 %3, 1
2851+
%5 = zext nneg i32 %4 to i64
2852+
%6 = mul i64 %5, 132
2853+
ret i64 %6
2854+
}
2855+

0 commit comments

Comments
 (0)