Skip to content

Commit 25cd249

Browse files
committed
AMDGPU: Don't assert on select of v32i16/v32f16
1 parent 4592709 commit 25cd249

File tree

3 files changed

+1862
-1
lines changed

3 files changed

+1862
-1
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5304,7 +5304,7 @@ SDValue SITargetLowering::splitTernaryVectorOp(SDValue Op,
53045304
assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v8i16 ||
53055305
VT == MVT::v8f16 || VT == MVT::v4f32 || VT == MVT::v16i16 ||
53065306
VT == MVT::v16f16 || VT == MVT::v8f32 || VT == MVT::v16f32 ||
5307-
VT == MVT::v32f32);
5307+
VT == MVT::v32f32 || VT == MVT::v32f16 || VT == MVT::v32i16);
53085308

53095309
SDValue Lo0, Hi0;
53105310
SDValue Op0 = Op.getOperand(0);

llvm/test/CodeGen/AMDGPU/select-vectors.ll

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,52 @@ define amdgpu_kernel void @v_select_v8i16(ptr addrspace(1) %out, ptr addrspace(1
152152
ret void
153153
}
154154

155+
; GCN-LABEL: {{^}}v_select_v16i16:
156+
; GCN: v_cndmask_b32_e32
157+
; GCN: v_cndmask_b32_e32
158+
; GCN: v_cndmask_b32_e32
159+
; GCN: v_cndmask_b32_e32
160+
; GCN: v_cndmask_b32_e32
161+
; GCN: v_cndmask_b32_e32
162+
; GCN: v_cndmask_b32_e32
163+
; GCN: v_cndmask_b32_e32
164+
; GCN-NOT: cndmask
165+
define amdgpu_kernel void @v_select_v16i16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, i32 %c) #0 {
166+
%a = load <16 x i16>, ptr addrspace(1) %a.ptr
167+
%b = load <16 x i16>, ptr addrspace(1) %b.ptr
168+
%cmp = icmp eq i32 %c, 0
169+
%select = select i1 %cmp, <16 x i16> %a, <16 x i16> %b
170+
store <16 x i16> %select, ptr addrspace(1) %out, align 4
171+
ret void
172+
}
173+
174+
; GCN-LABEL: {{^}}v_select_v32i16:
175+
; GCN: v_cndmask_b32_e32
176+
; GCN: v_cndmask_b32_e32
177+
; GCN: v_cndmask_b32_e32
178+
; GCN: v_cndmask_b32_e32
179+
; GCN: v_cndmask_b32_e32
180+
; GCN: v_cndmask_b32_e32
181+
; GCN: v_cndmask_b32_e32
182+
; GCN: v_cndmask_b32_e32
183+
; GCN: v_cndmask_b32_e32
184+
; GCN: v_cndmask_b32_e32
185+
; GCN: v_cndmask_b32_e32
186+
; GCN: v_cndmask_b32_e32
187+
; GCN: v_cndmask_b32_e32
188+
; GCN: v_cndmask_b32_e32
189+
; GCN: v_cndmask_b32_e32
190+
; GCN: v_cndmask_b32_e32
191+
; GCN-NOT: cndmask
192+
define amdgpu_kernel void @v_select_v32i16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, i32 %c) #0 {
193+
%a = load <32 x i16>, ptr addrspace(1) %a.ptr
194+
%b = load <32 x i16>, ptr addrspace(1) %b.ptr
195+
%cmp = icmp eq i32 %c, 0
196+
%select = select i1 %cmp, <32 x i16> %a, <32 x i16> %b
197+
store <32 x i16> %select, ptr addrspace(1) %out, align 4
198+
ret void
199+
}
200+
155201
; FIXME: Expansion with bitwise operations may be better if doing a
156202
; vector select with SGPR inputs.
157203

0 commit comments

Comments
 (0)