Skip to content

Commit 60941f1

Browse files
committed
[NVPTX] Lower v2f16 and v2bf16 stores as 32-bit scalars.
This avoids unnecessary vector splitting that was needed for vectorized store instruction. Differential Revision: https://reviews.llvm.org/D152593
1 parent a67208e commit 60941f1

File tree

2 files changed

+5
-2
lines changed

2 files changed

+5
-2
lines changed

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2465,6 +2465,10 @@ SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
24652465
VT, *Store->getMemOperand()))
24662466
return expandUnalignedStore(Store, DAG);
24672467

2468+
// v2f16 and v2bf16 don't need special handling.
2469+
if (VT == MVT::v2f16 || VT == MVT::v2bf16)
2470+
return SDValue();
2471+
24682472
if (VT.isVector())
24692473
return LowerSTOREVector(Op, DAG);
24702474

llvm/test/CodeGen/NVPTX/f16x2-instructions.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -276,8 +276,7 @@ define <2 x half> @test_frem(<2 x half> %a, <2 x half> %b) #0 {
276276
; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v2f16_param_0];
277277
; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v2f16_param_1];
278278
; CHECK-DAG: ld.b32 [[E:%r[0-9]+]], [%[[A]]]
279-
; CHECK: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[E]];
280-
; CHECK-DAG: st.v2.b16 [%[[B]]], {[[E0]], [[E1]]};
279+
; CHECK-DAG: st.b32 [%[[B]]], [[E]];
281280
; CHECK: ret;
282281
define void @test_ldst_v2f16(ptr %a, ptr %b) {
283282
%t1 = load <2 x half>, ptr %a

0 commit comments

Comments
 (0)