Skip to content

Commit 8c890ea

Browse files
authored
Revert "[SelectionDAG] Make (a & x) | (~a & y) -> (a & (x ^ y)) ^ y available for all targets" (#143648)
1 parent 841a7f0 commit 8c890ea

18 files changed

+1059
-1500
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 0 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -8128,59 +8128,6 @@ static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
81288128
return SDValue();
81298129
}
81308130

8131-
static SDValue foldMaskedMergeImpl(SDValue AndL0, SDValue AndR0, SDValue AndL1,
8132-
SDValue AndR1, const SDLoc &DL,
8133-
SelectionDAG &DAG) {
8134-
if (!isBitwiseNot(AndL0, true) || !AndL0->hasOneUse())
8135-
return SDValue();
8136-
SDValue NotOp = AndL0->getOperand(0);
8137-
if (NotOp == AndR1)
8138-
std::swap(AndR1, AndL1);
8139-
if (NotOp != AndL1)
8140-
return SDValue();
8141-
8142-
EVT VT = AndL1->getValueType(0);
8143-
SDValue Xor0 = DAG.getNode(ISD::XOR, DL, VT, AndR1, AndR0);
8144-
SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor0, NotOp);
8145-
SDValue Xor1 = DAG.getNode(ISD::XOR, DL, VT, And, AndR0);
8146-
return Xor1;
8147-
}
8148-
8149-
/// Fold "masked merge" expressions like `(m & x) | (~m & y)` into the
8150-
/// equivalent `((x ^ y) & m) ^ y)` pattern.
8151-
/// This is typically a better representation for targets without a fused
8152-
/// "and-not" operation.
8153-
static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG,
8154-
const TargetLowering &TLI, const SDLoc &DL) {
8155-
// Note that masked-merge variants using XOR or ADD expressions are
8156-
// normalized to OR by InstCombine so we only check for OR.
8157-
assert(Node->getOpcode() == ISD::OR && "Must be called with ISD::OR node");
8158-
SDValue N0 = Node->getOperand(0);
8159-
if (N0->getOpcode() != ISD::AND || !N0->hasOneUse())
8160-
return SDValue();
8161-
SDValue N1 = Node->getOperand(1);
8162-
if (N1->getOpcode() != ISD::AND || !N1->hasOneUse())
8163-
return SDValue();
8164-
8165-
// If the target supports and-not, don't fold this.
8166-
if (TLI.hasAndNot(SDValue(Node, 0)))
8167-
return SDValue();
8168-
8169-
SDValue N00 = N0->getOperand(0);
8170-
SDValue N01 = N0->getOperand(1);
8171-
SDValue N10 = N1->getOperand(0);
8172-
SDValue N11 = N1->getOperand(1);
8173-
if (SDValue Result = foldMaskedMergeImpl(N00, N01, N10, N11, DL, DAG))
8174-
return Result;
8175-
if (SDValue Result = foldMaskedMergeImpl(N01, N00, N10, N11, DL, DAG))
8176-
return Result;
8177-
if (SDValue Result = foldMaskedMergeImpl(N10, N11, N00, N01, DL, DAG))
8178-
return Result;
8179-
if (SDValue Result = foldMaskedMergeImpl(N11, N10, N00, N01, DL, DAG))
8180-
return Result;
8181-
return SDValue();
8182-
}
8183-
81848131
SDValue DAGCombiner::visitOR(SDNode *N) {
81858132
SDValue N0 = N->getOperand(0);
81868133
SDValue N1 = N->getOperand(1);
@@ -8359,10 +8306,6 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
83598306
if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
83608307
return R;
83618308

8362-
if (VT.isScalarInteger() && VT != MVT::i1)
8363-
if (SDValue R = foldMaskedMerge(N, DAG, TLI, DL))
8364-
return R;
8365-
83668309
return SDValue();
83678310
}
83688311

llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1283,20 +1283,6 @@ bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(
12831283
return true;
12841284
}
12851285

1286-
bool SystemZTargetLowering::hasAndNot(SDValue Y) const {
1287-
EVT VT = Y.getValueType();
1288-
1289-
// We can use NC(G)RK for types in GPRs ...
1290-
if (VT == MVT::i32 || VT == MVT::i64)
1291-
return Subtarget.hasMiscellaneousExtensions3();
1292-
1293-
// ... or VNC for types in VRs.
1294-
if (VT.isVector() || VT == MVT::i128)
1295-
return Subtarget.hasVector();
1296-
1297-
return false;
1298-
}
1299-
13001286
// Information about the addressing mode for a memory access.
13011287
struct AddressingMode {
13021288
// True if a long displacement is supported.

llvm/lib/Target/SystemZ/SystemZISelLowering.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -671,7 +671,6 @@ class SystemZTargetLowering : public TargetLowering {
671671
}
672672

673673
unsigned getStackProbeSize(const MachineFunction &MF) const;
674-
bool hasAndNot(SDValue Y) const override;
675674

676675
private:
677676
const SystemZSubtarget &Subtarget;

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52350,6 +52350,59 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
5235052350
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret);
5235152351
}
5235252352

52353+
static SDValue foldMaskedMergeImpl(SDValue And0_L, SDValue And0_R,
52354+
SDValue And1_L, SDValue And1_R,
52355+
const SDLoc &DL, SelectionDAG &DAG) {
52356+
if (!isBitwiseNot(And0_L, true) || !And0_L->hasOneUse())
52357+
return SDValue();
52358+
SDValue NotOp = And0_L->getOperand(0);
52359+
if (NotOp == And1_R)
52360+
std::swap(And1_R, And1_L);
52361+
if (NotOp != And1_L)
52362+
return SDValue();
52363+
52364+
// (~(NotOp) & And0_R) | (NotOp & And1_R)
52365+
// --> ((And0_R ^ And1_R) & NotOp) ^ And1_R
52366+
EVT VT = And1_L->getValueType(0);
52367+
SDValue Freeze_And0_R = DAG.getNode(ISD::FREEZE, SDLoc(), VT, And0_R);
52368+
SDValue Xor0 = DAG.getNode(ISD::XOR, DL, VT, And1_R, Freeze_And0_R);
52369+
SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor0, NotOp);
52370+
SDValue Xor1 = DAG.getNode(ISD::XOR, DL, VT, And, Freeze_And0_R);
52371+
return Xor1;
52372+
}
52373+
52374+
/// Fold "masked merge" expressions like `(m & x) | (~m & y)` into the
52375+
/// equivalent `((x ^ y) & m) ^ y)` pattern.
52376+
/// This is typically a better representation for targets without a fused
52377+
/// "and-not" operation. This function is intended to be called from a
52378+
/// `TargetLowering::PerformDAGCombine` callback on `ISD::OR` nodes.
52379+
static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG) {
52380+
// Note that masked-merge variants using XOR or ADD expressions are
52381+
// normalized to OR by InstCombine so we only check for OR.
52382+
assert(Node->getOpcode() == ISD::OR && "Must be called with ISD::OR node");
52383+
SDValue N0 = Node->getOperand(0);
52384+
if (N0->getOpcode() != ISD::AND || !N0->hasOneUse())
52385+
return SDValue();
52386+
SDValue N1 = Node->getOperand(1);
52387+
if (N1->getOpcode() != ISD::AND || !N1->hasOneUse())
52388+
return SDValue();
52389+
52390+
SDLoc DL(Node);
52391+
SDValue N00 = N0->getOperand(0);
52392+
SDValue N01 = N0->getOperand(1);
52393+
SDValue N10 = N1->getOperand(0);
52394+
SDValue N11 = N1->getOperand(1);
52395+
if (SDValue Result = foldMaskedMergeImpl(N00, N01, N10, N11, DL, DAG))
52396+
return Result;
52397+
if (SDValue Result = foldMaskedMergeImpl(N01, N00, N10, N11, DL, DAG))
52398+
return Result;
52399+
if (SDValue Result = foldMaskedMergeImpl(N10, N11, N00, N01, DL, DAG))
52400+
return Result;
52401+
if (SDValue Result = foldMaskedMergeImpl(N11, N10, N00, N01, DL, DAG))
52402+
return Result;
52403+
return SDValue();
52404+
}
52405+
5235352406
/// If this is an add or subtract where one operand is produced by a cmp+setcc,
5235452407
/// then try to convert it to an ADC or SBB. This replaces TEST+SET+{ADD/SUB}
5235552408
/// with CMP+{ADC, SBB}.
@@ -52753,6 +52806,11 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
5275352806
}
5275452807
}
5275552808

52809+
// We should fold "masked merge" patterns when `andn` is not available.
52810+
if (!Subtarget.hasBMI() && VT.isScalarInteger() && VT != MVT::i1)
52811+
if (SDValue R = foldMaskedMerge(N, DAG))
52812+
return R;
52813+
5275652814
if (SDValue R = combineOrXorWithSETCC(N->getOpcode(), dl, VT, N0, N1, DAG))
5275752815
return R;
5275852816

llvm/test/CodeGen/AMDGPU/bfi_int.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@ define amdgpu_kernel void @s_bfi_def_i32(ptr addrspace(1) %out, i32 %x, i32 %y,
1616
; GFX7-NEXT: s_mov_b32 s7, 0xf000
1717
; GFX7-NEXT: s_mov_b32 s6, -1
1818
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
19-
; GFX7-NEXT: s_xor_b32 s1, s1, s2
19+
; GFX7-NEXT: s_andn2_b32 s2, s2, s0
2020
; GFX7-NEXT: s_and_b32 s0, s1, s0
21-
; GFX7-NEXT: s_xor_b32 s0, s0, s2
21+
; GFX7-NEXT: s_or_b32 s0, s2, s0
2222
; GFX7-NEXT: v_mov_b32_e32 v0, s0
2323
; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0
2424
; GFX7-NEXT: s_endpgm
@@ -28,9 +28,9 @@ define amdgpu_kernel void @s_bfi_def_i32(ptr addrspace(1) %out, i32 %x, i32 %y,
2828
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c
2929
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
3030
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
31-
; GFX8-NEXT: s_xor_b32 s1, s1, s2
31+
; GFX8-NEXT: s_andn2_b32 s2, s2, s0
3232
; GFX8-NEXT: s_and_b32 s0, s1, s0
33-
; GFX8-NEXT: s_xor_b32 s0, s0, s2
33+
; GFX8-NEXT: s_or_b32 s0, s2, s0
3434
; GFX8-NEXT: v_mov_b32_e32 v0, s4
3535
; GFX8-NEXT: v_mov_b32_e32 v1, s5
3636
; GFX8-NEXT: v_mov_b32_e32 v2, s0
@@ -44,9 +44,9 @@ define amdgpu_kernel void @s_bfi_def_i32(ptr addrspace(1) %out, i32 %x, i32 %y,
4444
; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
4545
; GFX10-NEXT: v_mov_b32_e32 v0, 0
4646
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
47-
; GFX10-NEXT: s_xor_b32 s1, s1, s2
47+
; GFX10-NEXT: s_andn2_b32 s2, s2, s0
4848
; GFX10-NEXT: s_and_b32 s0, s1, s0
49-
; GFX10-NEXT: s_xor_b32 s0, s0, s2
49+
; GFX10-NEXT: s_or_b32 s0, s2, s0
5050
; GFX10-NEXT: v_mov_b32_e32 v1, s0
5151
; GFX10-NEXT: global_store_dword v0, v1, s[4:5]
5252
; GFX10-NEXT: s_endpgm
@@ -1407,9 +1407,9 @@ define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
14071407
; GFX7-NEXT: s_mov_b32 s7, 0xf000
14081408
; GFX7-NEXT: s_mov_b32 s6, -1
14091409
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1410-
; GFX7-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
1411-
; GFX7-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
1412-
; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
1410+
; GFX7-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3]
1411+
; GFX7-NEXT: s_andn2_b64 s[0:1], s[4:5], s[0:1]
1412+
; GFX7-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
14131413
; GFX7-NEXT: s_add_u32 s0, s0, 10
14141414
; GFX7-NEXT: s_addc_u32 s1, s1, 0
14151415
; GFX7-NEXT: v_mov_b32_e32 v0, s0
@@ -1422,9 +1422,9 @@ define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
14221422
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
14231423
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
14241424
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1425-
; GFX8-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
1426-
; GFX8-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
1427-
; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
1425+
; GFX8-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3]
1426+
; GFX8-NEXT: s_andn2_b64 s[0:1], s[4:5], s[0:1]
1427+
; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
14281428
; GFX8-NEXT: s_add_u32 s0, s0, 10
14291429
; GFX8-NEXT: s_addc_u32 s1, s1, 0
14301430
; GFX8-NEXT: v_mov_b32_e32 v0, s0
@@ -1438,9 +1438,9 @@ define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
14381438
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
14391439
; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
14401440
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1441-
; GFX10-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
1442-
; GFX10-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
1443-
; GFX10-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
1441+
; GFX10-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3]
1442+
; GFX10-NEXT: s_andn2_b64 s[0:1], s[4:5], s[0:1]
1443+
; GFX10-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
14441444
; GFX10-NEXT: s_add_u32 s0, s0, 10
14451445
; GFX10-NEXT: s_addc_u32 s1, s1, 0
14461446
; GFX10-NEXT: v_mov_b32_e32 v0, s0

llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -289,16 +289,16 @@ entry:
289289
define amdgpu_kernel void @half4_inselt(ptr addrspace(1) %out, <4 x half> %vec, i32 %sel) {
290290
; GCN-LABEL: half4_inselt:
291291
; GCN: ; %bb.0: ; %entry
292-
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
293292
; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
293+
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
294294
; GCN-NEXT: s_mov_b32 s4, 0x3c003c00
295295
; GCN-NEXT: s_mov_b32 s5, s4
296296
; GCN-NEXT: s_waitcnt lgkmcnt(0)
297-
; GCN-NEXT: s_xor_b64 s[4:5], s[2:3], s[4:5]
298297
; GCN-NEXT: s_lshl_b32 s6, s6, 4
299298
; GCN-NEXT: s_lshl_b64 s[6:7], 0xffff, s6
300-
; GCN-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
301-
; GCN-NEXT: s_xor_b64 s[2:3], s[4:5], s[2:3]
299+
; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], s[6:7]
300+
; GCN-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5]
301+
; GCN-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3]
302302
; GCN-NEXT: v_mov_b32_e32 v0, s0
303303
; GCN-NEXT: v_mov_b32_e32 v2, s2
304304
; GCN-NEXT: v_mov_b32_e32 v1, s1
@@ -317,10 +317,10 @@ define amdgpu_kernel void @half2_inselt(ptr addrspace(1) %out, <2 x half> %vec,
317317
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
318318
; GCN-NEXT: s_waitcnt lgkmcnt(0)
319319
; GCN-NEXT: s_lshl_b32 s3, s3, 4
320-
; GCN-NEXT: s_xor_b32 s4, s2, 0x3c003c00
321320
; GCN-NEXT: s_lshl_b32 s3, 0xffff, s3
322-
; GCN-NEXT: s_and_b32 s3, s4, s3
323-
; GCN-NEXT: s_xor_b32 s2, s3, s2
321+
; GCN-NEXT: s_andn2_b32 s2, s2, s3
322+
; GCN-NEXT: s_and_b32 s3, s3, 0x3c003c00
323+
; GCN-NEXT: s_or_b32 s2, s3, s2
324324
; GCN-NEXT: v_mov_b32_e32 v0, s0
325325
; GCN-NEXT: v_mov_b32_e32 v1, s1
326326
; GCN-NEXT: v_mov_b32_e32 v2, s2
@@ -399,10 +399,10 @@ define amdgpu_kernel void @short2_inselt(ptr addrspace(1) %out, <2 x i16> %vec,
399399
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
400400
; GCN-NEXT: s_waitcnt lgkmcnt(0)
401401
; GCN-NEXT: s_lshl_b32 s3, s3, 4
402-
; GCN-NEXT: s_xor_b32 s4, s2, 0x10001
403402
; GCN-NEXT: s_lshl_b32 s3, 0xffff, s3
404-
; GCN-NEXT: s_and_b32 s3, s4, s3
405-
; GCN-NEXT: s_xor_b32 s2, s3, s2
403+
; GCN-NEXT: s_andn2_b32 s2, s2, s3
404+
; GCN-NEXT: s_and_b32 s3, s3, 0x10001
405+
; GCN-NEXT: s_or_b32 s2, s3, s2
406406
; GCN-NEXT: v_mov_b32_e32 v0, s0
407407
; GCN-NEXT: v_mov_b32_e32 v1, s1
408408
; GCN-NEXT: v_mov_b32_e32 v2, s2
@@ -417,16 +417,16 @@ entry:
417417
define amdgpu_kernel void @short4_inselt(ptr addrspace(1) %out, <4 x i16> %vec, i32 %sel) {
418418
; GCN-LABEL: short4_inselt:
419419
; GCN: ; %bb.0: ; %entry
420-
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
421420
; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
421+
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
422422
; GCN-NEXT: s_mov_b32 s4, 0x10001
423423
; GCN-NEXT: s_mov_b32 s5, s4
424424
; GCN-NEXT: s_waitcnt lgkmcnt(0)
425-
; GCN-NEXT: s_xor_b64 s[4:5], s[2:3], s[4:5]
426425
; GCN-NEXT: s_lshl_b32 s6, s6, 4
427426
; GCN-NEXT: s_lshl_b64 s[6:7], 0xffff, s6
428-
; GCN-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
429-
; GCN-NEXT: s_xor_b64 s[2:3], s[4:5], s[2:3]
427+
; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], s[6:7]
428+
; GCN-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5]
429+
; GCN-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3]
430430
; GCN-NEXT: v_mov_b32_e32 v0, s0
431431
; GCN-NEXT: v_mov_b32_e32 v2, s2
432432
; GCN-NEXT: v_mov_b32_e32 v1, s1
@@ -442,15 +442,15 @@ entry:
442442
define amdgpu_kernel void @byte8_inselt(ptr addrspace(1) %out, <8 x i8> %vec, i32 %sel) {
443443
; GCN-LABEL: byte8_inselt:
444444
; GCN: ; %bb.0: ; %entry
445-
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
446445
; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
446+
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
447447
; GCN-NEXT: s_waitcnt lgkmcnt(0)
448-
; GCN-NEXT: s_xor_b32 s5, s3, 0x1010101
449-
; GCN-NEXT: s_lshl_b32 s6, s6, 3
450-
; GCN-NEXT: s_xor_b32 s4, s2, 0x1010101
451-
; GCN-NEXT: s_lshl_b64 s[6:7], 0xff, s6
452-
; GCN-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
453-
; GCN-NEXT: s_xor_b64 s[2:3], s[4:5], s[2:3]
448+
; GCN-NEXT: s_lshl_b32 s4, s6, 3
449+
; GCN-NEXT: s_lshl_b64 s[4:5], 0xff, s4
450+
; GCN-NEXT: s_and_b32 s7, s5, 0x1010101
451+
; GCN-NEXT: s_and_b32 s6, s4, 0x1010101
452+
; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5]
453+
; GCN-NEXT: s_or_b64 s[2:3], s[6:7], s[2:3]
454454
; GCN-NEXT: v_mov_b32_e32 v0, s0
455455
; GCN-NEXT: v_mov_b32_e32 v2, s2
456456
; GCN-NEXT: v_mov_b32_e32 v1, s1

0 commit comments

Comments
 (0)