Skip to content

Commit 6485790

Browse files
authored
[NVPTX] Improve lowering of v2i16 logical ops. (#67073)
Bitwise logical ops can always be done as b32, regardless of availability of other v2i16 ops, that would need a new GPU.
1 parent 64d1cea commit 6485790

File tree

3 files changed

+78
-4
lines changed

3 files changed

+78
-4
lines changed

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -642,10 +642,9 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
642642
setI16x2OperationAction(ISD::UREM, MVT::v2i16, Legal, Custom);
643643

644644
// Other arithmetic and logic ops are unsupported.
645-
setOperationAction({ISD::AND, ISD::OR, ISD::XOR, ISD::SDIV, ISD::UDIV,
646-
ISD::SRA, ISD::SRL, ISD::MULHS, ISD::MULHU,
647-
ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::SINT_TO_FP,
648-
ISD::UINT_TO_FP},
645+
setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SRA, ISD::SRL, ISD::MULHS,
646+
ISD::MULHU, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
647+
ISD::SINT_TO_FP, ISD::UINT_TO_FP},
649648
MVT::v2i16, Expand);
650649

651650
setOperationAction(ISD::ADDC, MVT::i32, Legal);

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1486,6 +1486,17 @@ defm OR : BITWISE<"or", or>;
14861486
defm AND : BITWISE<"and", and>;
14871487
defm XOR : BITWISE<"xor", xor>;
14881488

1489+
// Lower logical ops as bitwise ops on b32.
1490+
// By this point the constants get legalized into a bitcast from i32, so that's
1491+
// what we need to match here.
1492+
def: Pat<(or Int32Regs:$a, (v2i16 (bitconvert (i32 imm:$b)))),
1493+
(ORb32ri Int32Regs:$a, imm:$b)>;
1494+
def: Pat<(xor Int32Regs:$a, (v2i16 (bitconvert (i32 imm:$b)))),
1495+
(XORb32ri Int32Regs:$a, imm:$b)>;
1496+
def: Pat<(and Int32Regs:$a, (v2i16 (bitconvert (i32 imm:$b)))),
1497+
(ANDb32ri Int32Regs:$a, imm:$b)>;
1498+
1499+
14891500
def NOT1 : NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src),
14901501
"not.pred \t$dst, $src;",
14911502
[(set Int1Regs:$dst, (not Int1Regs:$src))]>;

llvm/test/CodeGen/NVPTX/i16x2-instructions.ll

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,70 @@ define <2 x i16> @test_mul(<2 x i16> %a, <2 x i16> %b) #0 {
235235
ret <2 x i16> %r
236236
}
237237

238+
;; Logical ops are available on all GPUs as regular 32-bit logical ops
239+
; COMMON-LABEL: test_or(
240+
; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_or_param_0];
241+
; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_or_param_1];
242+
; COMMON-NEXT: or.b32 [[R:%r[0-9]+]], [[A]], [[B]];
243+
; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
244+
; COMMON-NEXT: ret;
245+
define <2 x i16> @test_or(<2 x i16> %a, <2 x i16> %b) #0 {
246+
%r = or <2 x i16> %a, %b
247+
ret <2 x i16> %r
248+
}
249+
250+
; Check that we can lower or with immediate arguments.
251+
; COMMON-LABEL: test_or_imm_0(
252+
; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_or_imm_0_param_0];
253+
; COMMON-NEXT: or.b32 [[R:%r[0-9]+]], [[A]], 131073;
254+
; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
255+
; COMMON-NEXT: ret;
256+
define <2 x i16> @test_or_imm_0(<2 x i16> %a) #0 {
257+
%r = or <2 x i16> <i16 1, i16 2>, %a
258+
ret <2 x i16> %r
259+
}
260+
261+
; COMMON-LABEL: test_or_imm_1(
262+
; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_or_imm_1_param_0];
263+
; COMMON-NEXT: or.b32 [[R:%r[0-9]+]], [[A]], 131073;
264+
; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
265+
; COMMON-NEXT: ret;
266+
define <2 x i16> @test_or_imm_1(<2 x i16> %a) #0 {
267+
%r = or <2 x i16> %a, <i16 1, i16 2>
268+
ret <2 x i16> %r
269+
}
270+
271+
; COMMON-LABEL: test_xor(
272+
; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_xor_param_0];
273+
; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_xor_param_1];
274+
; COMMON-NEXT: xor.b32 [[R:%r[0-9]+]], [[A]], [[B]];
275+
; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
276+
; COMMON-NEXT: ret;
277+
define <2 x i16> @test_xor(<2 x i16> %a, <2 x i16> %b) #0 {
278+
%r = xor <2 x i16> %a, %b
279+
ret <2 x i16> %r
280+
}
281+
282+
; Check that we can lower xor with immediate arguments.
283+
; COMMON-LABEL: test_xor_imm_0(
284+
; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_xor_imm_0_param_0];
285+
; COMMON-NEXT: xor.b32 [[R:%r[0-9]+]], [[A]], 131073;
286+
; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
287+
; COMMON-NEXT: ret;
288+
define <2 x i16> @test_xor_imm_0(<2 x i16> %a) #0 {
289+
%r = xor <2 x i16> <i16 1, i16 2>, %a
290+
ret <2 x i16> %r
291+
}
292+
293+
; COMMON-LABEL: test_xor_imm_1(
294+
; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_xor_imm_1_param_0];
295+
; COMMON-NEXT: xor.b32 [[R:%r[0-9]+]], [[A]], 131073;
296+
; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
297+
; COMMON-NEXT: ret;
298+
define <2 x i16> @test_xor_imm_1(<2 x i16> %a) #0 {
299+
%r = xor <2 x i16> %a, <i16 1, i16 2>
300+
ret <2 x i16> %r
301+
}
238302

239303
; COMMON-LABEL: .func test_ldst_v2i16(
240304
; COMMON-DAG: ld.param.u64 [[A:%rd[0-9]+]], [test_ldst_v2i16_param_0];

0 commit comments

Comments
 (0)