Skip to content

Commit cd1e9c4

Browse files
el-evtomtor
authored andcommitted
[SelectionDAG] Make (a & x) | (~a & y) -> (a & (x ^ y)) ^ y available for all targets (llvm#137641)
1 parent 5711990 commit cd1e9c4

18 files changed

+1500
-1059
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8127,6 +8127,59 @@ static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
81278127
return SDValue();
81288128
}
81298129

8130+
static SDValue foldMaskedMergeImpl(SDValue AndL0, SDValue AndR0, SDValue AndL1,
8131+
SDValue AndR1, const SDLoc &DL,
8132+
SelectionDAG &DAG) {
8133+
if (!isBitwiseNot(AndL0, true) || !AndL0->hasOneUse())
8134+
return SDValue();
8135+
SDValue NotOp = AndL0->getOperand(0);
8136+
if (NotOp == AndR1)
8137+
std::swap(AndR1, AndL1);
8138+
if (NotOp != AndL1)
8139+
return SDValue();
8140+
8141+
EVT VT = AndL1->getValueType(0);
8142+
SDValue Xor0 = DAG.getNode(ISD::XOR, DL, VT, AndR1, AndR0);
8143+
SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor0, NotOp);
8144+
SDValue Xor1 = DAG.getNode(ISD::XOR, DL, VT, And, AndR0);
8145+
return Xor1;
8146+
}
8147+
8148+
/// Fold "masked merge" expressions like `(m & x) | (~m & y)` into the
8149+
/// equivalent `((x ^ y) & m) ^ y)` pattern.
8150+
/// This is typically a better representation for targets without a fused
8151+
/// "and-not" operation.
8152+
static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG,
8153+
const TargetLowering &TLI, const SDLoc &DL) {
8154+
// Note that masked-merge variants using XOR or ADD expressions are
8155+
// normalized to OR by InstCombine so we only check for OR.
8156+
assert(Node->getOpcode() == ISD::OR && "Must be called with ISD::OR node");
8157+
SDValue N0 = Node->getOperand(0);
8158+
if (N0->getOpcode() != ISD::AND || !N0->hasOneUse())
8159+
return SDValue();
8160+
SDValue N1 = Node->getOperand(1);
8161+
if (N1->getOpcode() != ISD::AND || !N1->hasOneUse())
8162+
return SDValue();
8163+
8164+
// If the target supports and-not, don't fold this.
8165+
if (TLI.hasAndNot(SDValue(Node, 0)))
8166+
return SDValue();
8167+
8168+
SDValue N00 = N0->getOperand(0);
8169+
SDValue N01 = N0->getOperand(1);
8170+
SDValue N10 = N1->getOperand(0);
8171+
SDValue N11 = N1->getOperand(1);
8172+
if (SDValue Result = foldMaskedMergeImpl(N00, N01, N10, N11, DL, DAG))
8173+
return Result;
8174+
if (SDValue Result = foldMaskedMergeImpl(N01, N00, N10, N11, DL, DAG))
8175+
return Result;
8176+
if (SDValue Result = foldMaskedMergeImpl(N10, N11, N00, N01, DL, DAG))
8177+
return Result;
8178+
if (SDValue Result = foldMaskedMergeImpl(N11, N10, N00, N01, DL, DAG))
8179+
return Result;
8180+
return SDValue();
8181+
}
8182+
81308183
SDValue DAGCombiner::visitOR(SDNode *N) {
81318184
SDValue N0 = N->getOperand(0);
81328185
SDValue N1 = N->getOperand(1);
@@ -8305,6 +8358,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
83058358
if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
83068359
return R;
83078360

8361+
if (VT.isScalarInteger() && VT != MVT::i1)
8362+
if (SDValue R = foldMaskedMerge(N, DAG, TLI, DL))
8363+
return R;
8364+
83088365
return SDValue();
83098366
}
83108367

llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1283,6 +1283,20 @@ bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(
12831283
return true;
12841284
}
12851285

1286+
bool SystemZTargetLowering::hasAndNot(SDValue Y) const {
1287+
EVT VT = Y.getValueType();
1288+
1289+
// We can use NC(G)RK for types in GPRs ...
1290+
if (VT == MVT::i32 || VT == MVT::i64)
1291+
return Subtarget.hasMiscellaneousExtensions3();
1292+
1293+
// ... or VNC for types in VRs.
1294+
if (VT.isVector() || VT == MVT::i128)
1295+
return Subtarget.hasVector();
1296+
1297+
return false;
1298+
}
1299+
12861300
// Information about the addressing mode for a memory access.
12871301
struct AddressingMode {
12881302
// True if a long displacement is supported.

llvm/lib/Target/SystemZ/SystemZISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -671,6 +671,7 @@ class SystemZTargetLowering : public TargetLowering {
671671
}
672672

673673
unsigned getStackProbeSize(const MachineFunction &MF) const;
674+
bool hasAndNot(SDValue Y) const override;
674675

675676
private:
676677
const SystemZSubtarget &Subtarget;

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 0 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -52292,59 +52292,6 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
5229252292
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret);
5229352293
}
5229452294

52295-
static SDValue foldMaskedMergeImpl(SDValue And0_L, SDValue And0_R,
52296-
SDValue And1_L, SDValue And1_R,
52297-
const SDLoc &DL, SelectionDAG &DAG) {
52298-
if (!isBitwiseNot(And0_L, true) || !And0_L->hasOneUse())
52299-
return SDValue();
52300-
SDValue NotOp = And0_L->getOperand(0);
52301-
if (NotOp == And1_R)
52302-
std::swap(And1_R, And1_L);
52303-
if (NotOp != And1_L)
52304-
return SDValue();
52305-
52306-
// (~(NotOp) & And0_R) | (NotOp & And1_R)
52307-
// --> ((And0_R ^ And1_R) & NotOp) ^ And1_R
52308-
EVT VT = And1_L->getValueType(0);
52309-
SDValue Freeze_And0_R = DAG.getNode(ISD::FREEZE, SDLoc(), VT, And0_R);
52310-
SDValue Xor0 = DAG.getNode(ISD::XOR, DL, VT, And1_R, Freeze_And0_R);
52311-
SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor0, NotOp);
52312-
SDValue Xor1 = DAG.getNode(ISD::XOR, DL, VT, And, Freeze_And0_R);
52313-
return Xor1;
52314-
}
52315-
52316-
/// Fold "masked merge" expressions like `(m & x) | (~m & y)` into the
52317-
/// equivalent `((x ^ y) & m) ^ y)` pattern.
52318-
/// This is typically a better representation for targets without a fused
52319-
/// "and-not" operation. This function is intended to be called from a
52320-
/// `TargetLowering::PerformDAGCombine` callback on `ISD::OR` nodes.
52321-
static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG) {
52322-
// Note that masked-merge variants using XOR or ADD expressions are
52323-
// normalized to OR by InstCombine so we only check for OR.
52324-
assert(Node->getOpcode() == ISD::OR && "Must be called with ISD::OR node");
52325-
SDValue N0 = Node->getOperand(0);
52326-
if (N0->getOpcode() != ISD::AND || !N0->hasOneUse())
52327-
return SDValue();
52328-
SDValue N1 = Node->getOperand(1);
52329-
if (N1->getOpcode() != ISD::AND || !N1->hasOneUse())
52330-
return SDValue();
52331-
52332-
SDLoc DL(Node);
52333-
SDValue N00 = N0->getOperand(0);
52334-
SDValue N01 = N0->getOperand(1);
52335-
SDValue N10 = N1->getOperand(0);
52336-
SDValue N11 = N1->getOperand(1);
52337-
if (SDValue Result = foldMaskedMergeImpl(N00, N01, N10, N11, DL, DAG))
52338-
return Result;
52339-
if (SDValue Result = foldMaskedMergeImpl(N01, N00, N10, N11, DL, DAG))
52340-
return Result;
52341-
if (SDValue Result = foldMaskedMergeImpl(N10, N11, N00, N01, DL, DAG))
52342-
return Result;
52343-
if (SDValue Result = foldMaskedMergeImpl(N11, N10, N00, N01, DL, DAG))
52344-
return Result;
52345-
return SDValue();
52346-
}
52347-
5234852295
/// If this is an add or subtract where one operand is produced by a cmp+setcc,
5234952296
/// then try to convert it to an ADC or SBB. This replaces TEST+SET+{ADD/SUB}
5235052297
/// with CMP+{ADC, SBB}.
@@ -52748,11 +52695,6 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
5274852695
}
5274952696
}
5275052697

52751-
// We should fold "masked merge" patterns when `andn` is not available.
52752-
if (!Subtarget.hasBMI() && VT.isScalarInteger() && VT != MVT::i1)
52753-
if (SDValue R = foldMaskedMerge(N, DAG))
52754-
return R;
52755-
5275652698
if (SDValue R = combineOrXorWithSETCC(N->getOpcode(), dl, VT, N0, N1, DAG))
5275752699
return R;
5275852700

llvm/test/CodeGen/AMDGPU/bfi_int.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@ define amdgpu_kernel void @s_bfi_def_i32(ptr addrspace(1) %out, i32 %x, i32 %y,
1616
; GFX7-NEXT: s_mov_b32 s7, 0xf000
1717
; GFX7-NEXT: s_mov_b32 s6, -1
1818
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
19-
; GFX7-NEXT: s_andn2_b32 s2, s2, s0
19+
; GFX7-NEXT: s_xor_b32 s1, s1, s2
2020
; GFX7-NEXT: s_and_b32 s0, s1, s0
21-
; GFX7-NEXT: s_or_b32 s0, s2, s0
21+
; GFX7-NEXT: s_xor_b32 s0, s0, s2
2222
; GFX7-NEXT: v_mov_b32_e32 v0, s0
2323
; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0
2424
; GFX7-NEXT: s_endpgm
@@ -28,9 +28,9 @@ define amdgpu_kernel void @s_bfi_def_i32(ptr addrspace(1) %out, i32 %x, i32 %y,
2828
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c
2929
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
3030
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
31-
; GFX8-NEXT: s_andn2_b32 s2, s2, s0
31+
; GFX8-NEXT: s_xor_b32 s1, s1, s2
3232
; GFX8-NEXT: s_and_b32 s0, s1, s0
33-
; GFX8-NEXT: s_or_b32 s0, s2, s0
33+
; GFX8-NEXT: s_xor_b32 s0, s0, s2
3434
; GFX8-NEXT: v_mov_b32_e32 v0, s4
3535
; GFX8-NEXT: v_mov_b32_e32 v1, s5
3636
; GFX8-NEXT: v_mov_b32_e32 v2, s0
@@ -44,9 +44,9 @@ define amdgpu_kernel void @s_bfi_def_i32(ptr addrspace(1) %out, i32 %x, i32 %y,
4444
; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
4545
; GFX10-NEXT: v_mov_b32_e32 v0, 0
4646
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
47-
; GFX10-NEXT: s_andn2_b32 s2, s2, s0
47+
; GFX10-NEXT: s_xor_b32 s1, s1, s2
4848
; GFX10-NEXT: s_and_b32 s0, s1, s0
49-
; GFX10-NEXT: s_or_b32 s0, s2, s0
49+
; GFX10-NEXT: s_xor_b32 s0, s0, s2
5050
; GFX10-NEXT: v_mov_b32_e32 v1, s0
5151
; GFX10-NEXT: global_store_dword v0, v1, s[4:5]
5252
; GFX10-NEXT: s_endpgm
@@ -1407,9 +1407,9 @@ define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
14071407
; GFX7-NEXT: s_mov_b32 s7, 0xf000
14081408
; GFX7-NEXT: s_mov_b32 s6, -1
14091409
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1410-
; GFX7-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3]
1411-
; GFX7-NEXT: s_andn2_b64 s[0:1], s[4:5], s[0:1]
1412-
; GFX7-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
1410+
; GFX7-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
1411+
; GFX7-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
1412+
; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
14131413
; GFX7-NEXT: s_add_u32 s0, s0, 10
14141414
; GFX7-NEXT: s_addc_u32 s1, s1, 0
14151415
; GFX7-NEXT: v_mov_b32_e32 v0, s0
@@ -1422,9 +1422,9 @@ define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
14221422
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
14231423
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
14241424
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1425-
; GFX8-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3]
1426-
; GFX8-NEXT: s_andn2_b64 s[0:1], s[4:5], s[0:1]
1427-
; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
1425+
; GFX8-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
1426+
; GFX8-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
1427+
; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
14281428
; GFX8-NEXT: s_add_u32 s0, s0, 10
14291429
; GFX8-NEXT: s_addc_u32 s1, s1, 0
14301430
; GFX8-NEXT: v_mov_b32_e32 v0, s0
@@ -1438,9 +1438,9 @@ define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
14381438
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
14391439
; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
14401440
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1441-
; GFX10-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3]
1442-
; GFX10-NEXT: s_andn2_b64 s[0:1], s[4:5], s[0:1]
1443-
; GFX10-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
1441+
; GFX10-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
1442+
; GFX10-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
1443+
; GFX10-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
14441444
; GFX10-NEXT: s_add_u32 s0, s0, 10
14451445
; GFX10-NEXT: s_addc_u32 s1, s1, 0
14461446
; GFX10-NEXT: v_mov_b32_e32 v0, s0

llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -289,16 +289,16 @@ entry:
289289
define amdgpu_kernel void @half4_inselt(ptr addrspace(1) %out, <4 x half> %vec, i32 %sel) {
290290
; GCN-LABEL: half4_inselt:
291291
; GCN: ; %bb.0: ; %entry
292-
; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
293292
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
293+
; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
294294
; GCN-NEXT: s_mov_b32 s4, 0x3c003c00
295295
; GCN-NEXT: s_mov_b32 s5, s4
296296
; GCN-NEXT: s_waitcnt lgkmcnt(0)
297+
; GCN-NEXT: s_xor_b64 s[4:5], s[2:3], s[4:5]
297298
; GCN-NEXT: s_lshl_b32 s6, s6, 4
298299
; GCN-NEXT: s_lshl_b64 s[6:7], 0xffff, s6
299-
; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], s[6:7]
300-
; GCN-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5]
301-
; GCN-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3]
300+
; GCN-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
301+
; GCN-NEXT: s_xor_b64 s[2:3], s[4:5], s[2:3]
302302
; GCN-NEXT: v_mov_b32_e32 v0, s0
303303
; GCN-NEXT: v_mov_b32_e32 v2, s2
304304
; GCN-NEXT: v_mov_b32_e32 v1, s1
@@ -317,10 +317,10 @@ define amdgpu_kernel void @half2_inselt(ptr addrspace(1) %out, <2 x half> %vec,
317317
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
318318
; GCN-NEXT: s_waitcnt lgkmcnt(0)
319319
; GCN-NEXT: s_lshl_b32 s3, s3, 4
320+
; GCN-NEXT: s_xor_b32 s4, s2, 0x3c003c00
320321
; GCN-NEXT: s_lshl_b32 s3, 0xffff, s3
321-
; GCN-NEXT: s_andn2_b32 s2, s2, s3
322-
; GCN-NEXT: s_and_b32 s3, s3, 0x3c003c00
323-
; GCN-NEXT: s_or_b32 s2, s3, s2
322+
; GCN-NEXT: s_and_b32 s3, s4, s3
323+
; GCN-NEXT: s_xor_b32 s2, s3, s2
324324
; GCN-NEXT: v_mov_b32_e32 v0, s0
325325
; GCN-NEXT: v_mov_b32_e32 v1, s1
326326
; GCN-NEXT: v_mov_b32_e32 v2, s2
@@ -399,10 +399,10 @@ define amdgpu_kernel void @short2_inselt(ptr addrspace(1) %out, <2 x i16> %vec,
399399
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
400400
; GCN-NEXT: s_waitcnt lgkmcnt(0)
401401
; GCN-NEXT: s_lshl_b32 s3, s3, 4
402+
; GCN-NEXT: s_xor_b32 s4, s2, 0x10001
402403
; GCN-NEXT: s_lshl_b32 s3, 0xffff, s3
403-
; GCN-NEXT: s_andn2_b32 s2, s2, s3
404-
; GCN-NEXT: s_and_b32 s3, s3, 0x10001
405-
; GCN-NEXT: s_or_b32 s2, s3, s2
404+
; GCN-NEXT: s_and_b32 s3, s4, s3
405+
; GCN-NEXT: s_xor_b32 s2, s3, s2
406406
; GCN-NEXT: v_mov_b32_e32 v0, s0
407407
; GCN-NEXT: v_mov_b32_e32 v1, s1
408408
; GCN-NEXT: v_mov_b32_e32 v2, s2
@@ -417,16 +417,16 @@ entry:
417417
define amdgpu_kernel void @short4_inselt(ptr addrspace(1) %out, <4 x i16> %vec, i32 %sel) {
418418
; GCN-LABEL: short4_inselt:
419419
; GCN: ; %bb.0: ; %entry
420-
; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
421420
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
421+
; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
422422
; GCN-NEXT: s_mov_b32 s4, 0x10001
423423
; GCN-NEXT: s_mov_b32 s5, s4
424424
; GCN-NEXT: s_waitcnt lgkmcnt(0)
425+
; GCN-NEXT: s_xor_b64 s[4:5], s[2:3], s[4:5]
425426
; GCN-NEXT: s_lshl_b32 s6, s6, 4
426427
; GCN-NEXT: s_lshl_b64 s[6:7], 0xffff, s6
427-
; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], s[6:7]
428-
; GCN-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5]
429-
; GCN-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3]
428+
; GCN-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
429+
; GCN-NEXT: s_xor_b64 s[2:3], s[4:5], s[2:3]
430430
; GCN-NEXT: v_mov_b32_e32 v0, s0
431431
; GCN-NEXT: v_mov_b32_e32 v2, s2
432432
; GCN-NEXT: v_mov_b32_e32 v1, s1
@@ -442,15 +442,15 @@ entry:
442442
define amdgpu_kernel void @byte8_inselt(ptr addrspace(1) %out, <8 x i8> %vec, i32 %sel) {
443443
; GCN-LABEL: byte8_inselt:
444444
; GCN: ; %bb.0: ; %entry
445-
; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
446445
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
446+
; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
447447
; GCN-NEXT: s_waitcnt lgkmcnt(0)
448-
; GCN-NEXT: s_lshl_b32 s4, s6, 3
449-
; GCN-NEXT: s_lshl_b64 s[4:5], 0xff, s4
450-
; GCN-NEXT: s_and_b32 s7, s5, 0x1010101
451-
; GCN-NEXT: s_and_b32 s6, s4, 0x1010101
452-
; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5]
453-
; GCN-NEXT: s_or_b64 s[2:3], s[6:7], s[2:3]
448+
; GCN-NEXT: s_xor_b32 s5, s3, 0x1010101
449+
; GCN-NEXT: s_lshl_b32 s6, s6, 3
450+
; GCN-NEXT: s_xor_b32 s4, s2, 0x1010101
451+
; GCN-NEXT: s_lshl_b64 s[6:7], 0xff, s6
452+
; GCN-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
453+
; GCN-NEXT: s_xor_b64 s[2:3], s[4:5], s[2:3]
454454
; GCN-NEXT: v_mov_b32_e32 v0, s0
455455
; GCN-NEXT: v_mov_b32_e32 v2, s2
456456
; GCN-NEXT: v_mov_b32_e32 v1, s1

0 commit comments

Comments
 (0)