Skip to content

Commit 0f8aef9

Browse files
committed
[DAGCombiner] Attempt to fold 'add' nodes to funnel-shift or rotate
1 parent f16c677 commit 0f8aef9

File tree

5 files changed

+77
-143
lines changed

5 files changed

+77
-143
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 43 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -649,14 +649,15 @@ namespace {
649649
bool DemandHighBits = true);
650650
SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
651651
SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
652-
SDValue InnerPos, SDValue InnerNeg, bool HasPos,
653-
unsigned PosOpcode, unsigned NegOpcode,
654-
const SDLoc &DL);
652+
SDValue InnerPos, SDValue InnerNeg, bool FromAdd,
653+
bool HasPos, unsigned PosOpcode,
654+
unsigned NegOpcode, const SDLoc &DL);
655655
SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
656-
SDValue InnerPos, SDValue InnerNeg, bool HasPos,
657-
unsigned PosOpcode, unsigned NegOpcode,
658-
const SDLoc &DL);
659-
SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
656+
SDValue InnerPos, SDValue InnerNeg, bool FromAdd,
657+
bool HasPos, unsigned PosOpcode,
658+
unsigned NegOpcode, const SDLoc &DL);
659+
SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL,
660+
bool FromAdd);
660661
SDValue MatchLoadCombine(SDNode *N);
661662
SDValue mergeTruncStores(StoreSDNode *N);
662663
SDValue reduceLoadWidth(SDNode *N);
@@ -2986,6 +2987,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
29862987
if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
29872988
return V;
29882989

2990+
if (SDValue V = MatchRotate(N0, N1, SDLoc(N), /*FromAdd=*/true))
2991+
return V;
2992+
29892993
// Try to match AVGFLOOR fixedwidth pattern
29902994
if (SDValue V = foldAddToAvg(N, DL))
29912995
return V;
@@ -8175,7 +8179,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
81758179
return V;
81768180

81778181
// See if this is some rotate idiom.
8178-
if (SDValue Rot = MatchRotate(N0, N1, DL))
8182+
if (SDValue Rot = MatchRotate(N0, N1, DL, /*FromAdd=*/false))
81798183
return Rot;
81808184

81818185
if (SDValue Load = MatchLoadCombine(N))
@@ -8364,7 +8368,7 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
83648368
// The IsRotate flag should be set when the LHS of both shifts is the same.
83658369
// Otherwise if matching a general funnel shift, it should be clear.
83668370
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
8367-
SelectionDAG &DAG, bool IsRotate) {
8371+
SelectionDAG &DAG, bool IsRotate, bool FromAdd) {
83688372
const auto &TLI = DAG.getTargetLoweringInfo();
83698373
// If EltSize is a power of 2 then:
83708374
//
@@ -8403,7 +8407,7 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
84038407
// NOTE: We can only do this when matching operations which won't modify the
84048408
// least Log2(EltSize) significant bits and not a general funnel shift.
84058409
unsigned MaskLoBits = 0;
8406-
if (IsRotate && isPowerOf2_64(EltSize)) {
8410+
if (IsRotate && !FromAdd && isPowerOf2_64(EltSize)) {
84078411
unsigned Bits = Log2_64(EltSize);
84088412
unsigned NegBits = Neg.getScalarValueSizeInBits();
84098413
if (NegBits >= Bits) {
@@ -8486,9 +8490,9 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
84868490
// Neg with outer conversions stripped away.
84878491
SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
84888492
SDValue Neg, SDValue InnerPos,
8489-
SDValue InnerNeg, bool HasPos,
8490-
unsigned PosOpcode, unsigned NegOpcode,
8491-
const SDLoc &DL) {
8493+
SDValue InnerNeg, bool FromAdd,
8494+
bool HasPos, unsigned PosOpcode,
8495+
unsigned NegOpcode, const SDLoc &DL) {
84928496
// fold (or (shl x, (*ext y)),
84938497
// (srl x, (*ext (sub 32, y)))) ->
84948498
// (rotl x, y) or (rotr x, (sub 32, y))
@@ -8498,10 +8502,9 @@ SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
84988502
// (rotr x, y) or (rotl x, (sub 32, y))
84998503
EVT VT = Shifted.getValueType();
85008504
if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
8501-
/*IsRotate*/ true)) {
8505+
/*IsRotate*/ true, FromAdd))
85028506
return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
85038507
HasPos ? Pos : Neg);
8504-
}
85058508

85068509
return SDValue();
85078510
}
@@ -8514,9 +8517,9 @@ SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
85148517
// TODO: Merge with MatchRotatePosNeg.
85158518
SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
85168519
SDValue Neg, SDValue InnerPos,
8517-
SDValue InnerNeg, bool HasPos,
8518-
unsigned PosOpcode, unsigned NegOpcode,
8519-
const SDLoc &DL) {
8520+
SDValue InnerNeg, bool FromAdd,
8521+
bool HasPos, unsigned PosOpcode,
8522+
unsigned NegOpcode, const SDLoc &DL) {
85208523
EVT VT = N0.getValueType();
85218524
unsigned EltBits = VT.getScalarSizeInBits();
85228525

@@ -8527,10 +8530,10 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
85278530
// fold (or (shl x0, (*ext (sub 32, y))),
85288531
// (srl x1, (*ext y))) ->
85298532
// (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
8530-
if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
8533+
if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1,
8534+
FromAdd))
85318535
return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
85328536
HasPos ? Pos : Neg);
8533-
}
85348537

85358538
// Matching the shift+xor cases, we can't easily use the xor'd shift amount
85368539
// so for now just use the PosOpcode case if its legal.
@@ -8569,11 +8572,12 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
85698572
return SDValue();
85708573
}
85718574

8572-
// MatchRotate - Handle an 'or' of two operands. If this is one of the many
8573-
// idioms for rotate, and if the target supports rotation instructions, generate
8574-
// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
8575-
// with different shifted sources.
8576-
SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
8575+
// MatchRotate - Handle an 'or' or 'add' of two operands. If this is one of the
8576+
// many idioms for rotate, and if the target supports rotation instructions,
8577+
// generate a rot[lr]. This also matches funnel shift patterns, similar to
8578+
// rotation but with different shifted sources.
8579+
SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL,
8580+
bool FromAdd) {
85778581
EVT VT = LHS.getValueType();
85788582

85798583
// The target must have at least one rotate/funnel flavor.
@@ -8600,9 +8604,9 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
86008604
if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
86018605
LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
86028606
assert(LHS.getValueType() == RHS.getValueType());
8603-
if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
8607+
if (SDValue Rot =
8608+
MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL, FromAdd))
86048609
return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
8605-
}
86068610
}
86078611

86088612
// Match "(X shl/srl V1) & V2" where V2 may not be present.
@@ -8782,29 +8786,25 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
87828786
}
87838787

87848788
if (IsRotate && (HasROTL || HasROTR)) {
8785-
SDValue TryL =
8786-
MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
8787-
RExtOp0, HasROTL, ISD::ROTL, ISD::ROTR, DL);
8788-
if (TryL)
8789+
if (SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
8790+
LExtOp0, RExtOp0, FromAdd, HasROTL,
8791+
ISD::ROTL, ISD::ROTR, DL))
87898792
return TryL;
87908793

8791-
SDValue TryR =
8792-
MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
8793-
LExtOp0, HasROTR, ISD::ROTR, ISD::ROTL, DL);
8794-
if (TryR)
8794+
if (SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
8795+
RExtOp0, LExtOp0, FromAdd, HasROTR,
8796+
ISD::ROTR, ISD::ROTL, DL))
87958797
return TryR;
87968798
}
87978799

8798-
SDValue TryL =
8799-
MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
8800-
LExtOp0, RExtOp0, HasFSHL, ISD::FSHL, ISD::FSHR, DL);
8801-
if (TryL)
8800+
if (SDValue TryL = MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt,
8801+
RHSShiftAmt, LExtOp0, RExtOp0, FromAdd,
8802+
HasFSHL, ISD::FSHL, ISD::FSHR, DL))
88028803
return TryL;
88038804

8804-
SDValue TryR =
8805-
MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
8806-
RExtOp0, LExtOp0, HasFSHR, ISD::FSHR, ISD::FSHL, DL);
8807-
if (TryR)
8805+
if (SDValue TryR = MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt,
8806+
LHSShiftAmt, RExtOp0, LExtOp0, FromAdd,
8807+
HasFSHR, ISD::FSHR, ISD::FSHL, DL))
88088808
return TryR;
88098809

88108810
return SDValue();

llvm/test/CodeGen/AMDGPU/rotate-add.ll

Lines changed: 6 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -44,19 +44,15 @@ define i32 @test_rotl_var(i32 %x, i32 %y) {
4444
; SI-LABEL: test_rotl_var:
4545
; SI: ; %bb.0:
4646
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
47-
; SI-NEXT: v_lshlrev_b32_e32 v2, v1, v0
4847
; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1
49-
; SI-NEXT: v_lshrrev_b32_e32 v0, v1, v0
50-
; SI-NEXT: v_add_i32_e32 v0, vcc, v2, v0
48+
; SI-NEXT: v_alignbit_b32 v0, v0, v0, v1
5149
; SI-NEXT: s_setpc_b64 s[30:31]
5250
;
5351
; VI-LABEL: test_rotl_var:
5452
; VI: ; %bb.0:
5553
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
56-
; VI-NEXT: v_lshlrev_b32_e32 v2, v1, v0
5754
; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1
58-
; VI-NEXT: v_lshrrev_b32_e32 v0, v1, v0
59-
; VI-NEXT: v_add_u32_e32 v0, vcc, v2, v0
55+
; VI-NEXT: v_alignbit_b32 v0, v0, v0, v1
6056
; VI-NEXT: s_setpc_b64 s[30:31]
6157
%shl = shl i32 %x, %y
6258
%sub = sub i32 32, %y
@@ -69,19 +65,13 @@ define i32 @test_rotr_var(i32 %x, i32 %y) {
6965
; SI-LABEL: test_rotr_var:
7066
; SI: ; %bb.0:
7167
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
72-
; SI-NEXT: v_lshrrev_b32_e32 v2, v1, v0
73-
; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1
74-
; SI-NEXT: v_lshlrev_b32_e32 v0, v1, v0
75-
; SI-NEXT: v_add_i32_e32 v0, vcc, v2, v0
68+
; SI-NEXT: v_alignbit_b32 v0, v0, v0, v1
7669
; SI-NEXT: s_setpc_b64 s[30:31]
7770
;
7871
; VI-LABEL: test_rotr_var:
7972
; VI: ; %bb.0:
8073
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
81-
; VI-NEXT: v_lshrrev_b32_e32 v2, v1, v0
82-
; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1
83-
; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0
84-
; VI-NEXT: v_add_u32_e32 v0, vcc, v2, v0
74+
; VI-NEXT: v_alignbit_b32 v0, v0, v0, v1
8575
; VI-NEXT: s_setpc_b64 s[30:31]
8676
%shr = lshr i32 %x, %y
8777
%sub = sub i32 32, %y
@@ -174,21 +164,13 @@ define i32 @test_fshr_special_case(i32 %x0, i32 %x1, i32 %y) {
174164
; SI-LABEL: test_fshr_special_case:
175165
; SI: ; %bb.0:
176166
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
177-
; SI-NEXT: v_lshrrev_b32_e32 v1, v2, v1
178-
; SI-NEXT: v_lshlrev_b32_e32 v0, 1, v0
179-
; SI-NEXT: v_xor_b32_e32 v2, 31, v2
180-
; SI-NEXT: v_lshlrev_b32_e32 v0, v2, v0
181-
; SI-NEXT: v_add_i32_e32 v0, vcc, v1, v0
167+
; SI-NEXT: v_alignbit_b32 v0, v0, v1, v2
182168
; SI-NEXT: s_setpc_b64 s[30:31]
183169
;
184170
; VI-LABEL: test_fshr_special_case:
185171
; VI: ; %bb.0:
186172
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
187-
; VI-NEXT: v_lshrrev_b32_e32 v1, v2, v1
188-
; VI-NEXT: v_lshlrev_b32_e32 v0, 1, v0
189-
; VI-NEXT: v_xor_b32_e32 v2, 31, v2
190-
; VI-NEXT: v_lshlrev_b32_e32 v0, v2, v0
191-
; VI-NEXT: v_add_u32_e32 v0, vcc, v1, v0
173+
; VI-NEXT: v_alignbit_b32 v0, v0, v1, v2
192174
; VI-NEXT: s_setpc_b64 s[30:31]
193175
%shl = lshr i32 %x1, %y
194176
%srli = shl i32 %x0, 1

llvm/test/CodeGen/ARM/rotate-add.ll

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,8 @@ define i32 @test_simple_rotr(i32 %x) {
2929
define i32 @test_rotl_var(i32 %x, i32 %y) {
3030
; CHECK-LABEL: test_rotl_var:
3131
; CHECK: @ %bb.0:
32-
; CHECK-NEXT: lsl r2, r0, r1
3332
; CHECK-NEXT: rsb r1, r1, #32
34-
; CHECK-NEXT: add r0, r2, r0, lsr r1
33+
; CHECK-NEXT: ror r0, r0, r1
3534
; CHECK-NEXT: bx lr
3635
%shl = shl i32 %x, %y
3736
%sub = sub i32 32, %y
@@ -43,9 +42,7 @@ define i32 @test_rotl_var(i32 %x, i32 %y) {
4342
define i32 @test_rotr_var(i32 %x, i32 %y) {
4443
; CHECK-LABEL: test_rotr_var:
4544
; CHECK: @ %bb.0:
46-
; CHECK-NEXT: lsr r2, r0, r1
47-
; CHECK-NEXT: rsb r1, r1, #32
48-
; CHECK-NEXT: add r0, r2, r0, lsl r1
45+
; CHECK-NEXT: ror r0, r0, r1
4946
; CHECK-NEXT: bx lr
5047
%shr = lshr i32 %x, %y
5148
%sub = sub i32 32, %y

llvm/test/CodeGen/NVPTX/rotate-add.ll

Lines changed: 14 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -39,16 +39,13 @@ define i32 @test_simple_rotr(i32 %x) {
3939
define i32 @test_rotl_var(i32 %x, i32 %y) {
4040
; CHECK-LABEL: test_rotl_var(
4141
; CHECK: {
42-
; CHECK-NEXT: .reg .b32 %r<7>;
42+
; CHECK-NEXT: .reg .b32 %r<4>;
4343
; CHECK-EMPTY:
4444
; CHECK-NEXT: // %bb.0:
4545
; CHECK-NEXT: ld.param.u32 %r1, [test_rotl_var_param_0];
4646
; CHECK-NEXT: ld.param.u32 %r2, [test_rotl_var_param_1];
47-
; CHECK-NEXT: shl.b32 %r3, %r1, %r2;
48-
; CHECK-NEXT: sub.s32 %r4, 32, %r2;
49-
; CHECK-NEXT: shr.u32 %r5, %r1, %r4;
50-
; CHECK-NEXT: add.s32 %r6, %r3, %r5;
51-
; CHECK-NEXT: st.param.b32 [func_retval0], %r6;
47+
; CHECK-NEXT: shf.l.wrap.b32 %r3, %r1, %r1, %r2;
48+
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
5249
; CHECK-NEXT: ret;
5350
%shl = shl i32 %x, %y
5451
%sub = sub i32 32, %y
@@ -60,16 +57,13 @@ define i32 @test_rotl_var(i32 %x, i32 %y) {
6057
define i32 @test_rotr_var(i32 %x, i32 %y) {
6158
; CHECK-LABEL: test_rotr_var(
6259
; CHECK: {
63-
; CHECK-NEXT: .reg .b32 %r<7>;
60+
; CHECK-NEXT: .reg .b32 %r<4>;
6461
; CHECK-EMPTY:
6562
; CHECK-NEXT: // %bb.0:
6663
; CHECK-NEXT: ld.param.u32 %r1, [test_rotr_var_param_0];
6764
; CHECK-NEXT: ld.param.u32 %r2, [test_rotr_var_param_1];
68-
; CHECK-NEXT: shr.u32 %r3, %r1, %r2;
69-
; CHECK-NEXT: sub.s32 %r4, 32, %r2;
70-
; CHECK-NEXT: shl.b32 %r5, %r1, %r4;
71-
; CHECK-NEXT: add.s32 %r6, %r3, %r5;
72-
; CHECK-NEXT: st.param.b32 [func_retval0], %r6;
65+
; CHECK-NEXT: shf.r.wrap.b32 %r3, %r1, %r1, %r2;
66+
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
7367
; CHECK-NEXT: ret;
7468
%shr = lshr i32 %x, %y
7569
%sub = sub i32 32, %y
@@ -127,18 +121,14 @@ define i32 @test_invalid_rotr_var_and(i32 %x, i32 %y) {
127121
define i32 @test_fshl_special_case(i32 %x0, i32 %x1, i32 %y) {
128122
; CHECK-LABEL: test_fshl_special_case(
129123
; CHECK: {
130-
; CHECK-NEXT: .reg .b32 %r<9>;
124+
; CHECK-NEXT: .reg .b32 %r<5>;
131125
; CHECK-EMPTY:
132126
; CHECK-NEXT: // %bb.0:
133127
; CHECK-NEXT: ld.param.u32 %r1, [test_fshl_special_case_param_0];
134-
; CHECK-NEXT: ld.param.u32 %r2, [test_fshl_special_case_param_2];
135-
; CHECK-NEXT: shl.b32 %r3, %r1, %r2;
136-
; CHECK-NEXT: ld.param.u32 %r4, [test_fshl_special_case_param_1];
137-
; CHECK-NEXT: shr.u32 %r5, %r4, 1;
138-
; CHECK-NEXT: xor.b32 %r6, %r2, 31;
139-
; CHECK-NEXT: shr.u32 %r7, %r5, %r6;
140-
; CHECK-NEXT: add.s32 %r8, %r3, %r7;
141-
; CHECK-NEXT: st.param.b32 [func_retval0], %r8;
128+
; CHECK-NEXT: ld.param.u32 %r2, [test_fshl_special_case_param_1];
129+
; CHECK-NEXT: ld.param.u32 %r3, [test_fshl_special_case_param_2];
130+
; CHECK-NEXT: shf.l.wrap.b32 %r4, %r2, %r1, %r3;
131+
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
142132
; CHECK-NEXT: ret;
143133
%shl = shl i32 %x0, %y
144134
%srli = lshr i32 %x1, 1
@@ -151,18 +141,14 @@ define i32 @test_fshl_special_case(i32 %x0, i32 %x1, i32 %y) {
151141
define i32 @test_fshr_special_case(i32 %x0, i32 %x1, i32 %y) {
152142
; CHECK-LABEL: test_fshr_special_case(
153143
; CHECK: {
154-
; CHECK-NEXT: .reg .b32 %r<9>;
144+
; CHECK-NEXT: .reg .b32 %r<5>;
155145
; CHECK-EMPTY:
156146
; CHECK-NEXT: // %bb.0:
157147
; CHECK-NEXT: ld.param.u32 %r1, [test_fshr_special_case_param_0];
158148
; CHECK-NEXT: ld.param.u32 %r2, [test_fshr_special_case_param_1];
159149
; CHECK-NEXT: ld.param.u32 %r3, [test_fshr_special_case_param_2];
160-
; CHECK-NEXT: shr.u32 %r4, %r2, %r3;
161-
; CHECK-NEXT: shl.b32 %r5, %r1, 1;
162-
; CHECK-NEXT: xor.b32 %r6, %r3, 31;
163-
; CHECK-NEXT: shl.b32 %r7, %r5, %r6;
164-
; CHECK-NEXT: add.s32 %r8, %r4, %r7;
165-
; CHECK-NEXT: st.param.b32 [func_retval0], %r8;
150+
; CHECK-NEXT: shf.r.wrap.b32 %r4, %r2, %r1, %r3;
151+
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
166152
; CHECK-NEXT: ret;
167153
%shl = lshr i32 %x1, %y
168154
%srli = shl i32 %x0, 1

0 commit comments

Comments
 (0)