Skip to content

Commit b428bbd

Browse files
frasercrmcktmsri
authored andcommitted
[NVPTX] Set v2i16 SETCC to Expand (llvm#108969)
Note that this refers to the return type of SETCC. This operation is not legal in PTX but was assumed as such because v2i16 is declared a legal type. We were already expanding v4i8 SETCC. The DAGCombiner would in certain circumstances try to fold an extension of an illegal v2i1 SETCC (because v2i1 is illegal) into a "legal" v2i16 SETCC, which we wouldn't have patterns for.
1 parent a5efcb4 commit b428bbd

File tree

2 files changed

+73
-1
lines changed

2 files changed

+73
-1
lines changed

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -725,7 +725,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
725725
// Other arithmetic and logic ops are unsupported.
726726
setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SRA, ISD::SRL, ISD::MULHS,
727727
ISD::MULHU, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
728-
ISD::SINT_TO_FP, ISD::UINT_TO_FP},
728+
ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::SETCC},
729729
MVT::v2i16, Expand);
730730

731731
setOperationAction(ISD::ADDC, MVT::i32, Legal);

llvm/test/CodeGen/NVPTX/sext-setcc.ll

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 | FileCheck %s
3+
; RUN: %if ptxas-11.0 %{ llc < %s -march=nvptx64 -mcpu=sm_80 -mattr=+ptx70 | %ptxas-verify -arch=sm_80 %}
4+
5+
define <2 x i16> @sext_setcc_v2i1_to_v2i16(ptr %p) {
6+
; CHECK-LABEL: sext_setcc_v2i1_to_v2i16(
7+
; CHECK: {
8+
; CHECK-NEXT: .reg .pred %p<3>;
9+
; CHECK-NEXT: .reg .b16 %rs<5>;
10+
; CHECK-NEXT: .reg .b32 %r<3>;
11+
; CHECK-NEXT: .reg .b64 %rd<2>;
12+
; CHECK-EMPTY:
13+
; CHECK-NEXT: // %bb.0: // %entry
14+
; CHECK-NEXT: ld.param.u64 %rd1, [sext_setcc_v2i1_to_v2i16_param_0];
15+
; CHECK-NEXT: ld.u32 %r1, [%rd1];
16+
; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1;
17+
; CHECK-NEXT: setp.eq.s16 %p1, %rs1, 0;
18+
; CHECK-NEXT: setp.eq.s16 %p2, %rs2, 0;
19+
; CHECK-NEXT: selp.s16 %rs3, -1, 0, %p2;
20+
; CHECK-NEXT: selp.s16 %rs4, -1, 0, %p1;
21+
; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3};
22+
; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2;
23+
; CHECK-NEXT: ret;
24+
entry:
25+
%v = load <2 x i16>, ptr %p, align 4
26+
%cmp = icmp eq <2 x i16> %v, zeroinitializer
27+
%sext = sext <2 x i1> %cmp to <2 x i16>
28+
ret <2 x i16> %sext
29+
}
30+
31+
define <4 x i8> @sext_setcc_v4i1_to_v4i8(ptr %p) {
32+
; CHECK-LABEL: sext_setcc_v4i1_to_v4i8(
33+
; CHECK: {
34+
; CHECK-NEXT: .reg .pred %p<5>;
35+
; CHECK-NEXT: .reg .b16 %rs<9>;
36+
; CHECK-NEXT: .reg .b32 %r<14>;
37+
; CHECK-NEXT: .reg .b64 %rd<2>;
38+
; CHECK-EMPTY:
39+
; CHECK-NEXT: // %bb.0: // %entry
40+
; CHECK-NEXT: ld.param.u64 %rd1, [sext_setcc_v4i1_to_v4i8_param_0];
41+
; CHECK-NEXT: ld.u32 %r1, [%rd1];
42+
; CHECK-NEXT: bfe.u32 %r2, %r1, 24, 8;
43+
; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
44+
; CHECK-NEXT: and.b16 %rs2, %rs1, 255;
45+
; CHECK-NEXT: setp.eq.s16 %p1, %rs2, 0;
46+
; CHECK-NEXT: bfe.u32 %r3, %r1, 16, 8;
47+
; CHECK-NEXT: cvt.u16.u32 %rs3, %r3;
48+
; CHECK-NEXT: and.b16 %rs4, %rs3, 255;
49+
; CHECK-NEXT: setp.eq.s16 %p2, %rs4, 0;
50+
; CHECK-NEXT: bfe.u32 %r4, %r1, 8, 8;
51+
; CHECK-NEXT: cvt.u16.u32 %rs5, %r4;
52+
; CHECK-NEXT: and.b16 %rs6, %rs5, 255;
53+
; CHECK-NEXT: setp.eq.s16 %p3, %rs6, 0;
54+
; CHECK-NEXT: bfe.u32 %r5, %r1, 0, 8;
55+
; CHECK-NEXT: cvt.u16.u32 %rs7, %r5;
56+
; CHECK-NEXT: and.b16 %rs8, %rs7, 255;
57+
; CHECK-NEXT: setp.eq.s16 %p4, %rs8, 0;
58+
; CHECK-NEXT: selp.s32 %r6, -1, 0, %p4;
59+
; CHECK-NEXT: selp.s32 %r7, -1, 0, %p3;
60+
; CHECK-NEXT: bfi.b32 %r8, %r7, %r6, 8, 8;
61+
; CHECK-NEXT: selp.s32 %r9, -1, 0, %p2;
62+
; CHECK-NEXT: bfi.b32 %r10, %r9, %r8, 16, 8;
63+
; CHECK-NEXT: selp.s32 %r11, -1, 0, %p1;
64+
; CHECK-NEXT: bfi.b32 %r12, %r11, %r10, 24, 8;
65+
; CHECK-NEXT: st.param.b32 [func_retval0+0], %r12;
66+
; CHECK-NEXT: ret;
67+
entry:
68+
%v = load <4 x i8>, ptr %p, align 4
69+
%cmp = icmp eq <4 x i8> %v, zeroinitializer
70+
%sext = sext <4 x i1> %cmp to <4 x i8>
71+
ret <4 x i8> %sext
72+
}

0 commit comments

Comments
 (0)