Skip to content

Commit 627f6d8

Browse files
while lowering return introduce readfirstlane to copy the intermediate result to out reg
1 parent ccbbb17 commit 627f6d8

File tree

2 files changed

+25
-1
lines changed

2 files changed

+25
-1
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3251,6 +3251,8 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
32513251
SmallVector<SDValue, 48> RetOps;
32523252
RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
32533253

3254+
SDValue ReadFirstLane =
3255+
DAG.getTargetConstant(Intrinsic::amdgcn_readfirstlane, DL, MVT::i32);
32543256
// Copy the result values into the output registers.
32553257
for (unsigned I = 0, RealRVLocIdx = 0, E = RVLocs.size(); I != E;
32563258
++I, ++RealRVLocIdx) {
@@ -3278,7 +3280,8 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
32783280
default:
32793281
llvm_unreachable("Unknown loc info!");
32803282
}
3281-
3283+
Arg = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Arg.getValueType(),
3284+
ReadFirstLane, Arg);
32823285
Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Glue);
32833286
Glue = Chain.getValue(1);
32843287
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=finalize-isel | FileCheck %s -check-prefixes=GFX11
3+
4+
define amdgpu_ps i32 @s_copysign_f32_bf16(float inreg %mag, bfloat inreg %sign.bf16) {
5+
; GFX11-LABEL: name: s_copysign_f32_bf16
6+
; GFX11: bb.0 (%ir-block.0):
7+
; GFX11-NEXT: liveins: $sgpr0, $sgpr1
8+
; GFX11-NEXT: {{ $}}
9+
; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr1
10+
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
11+
; GFX11-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[COPY]], implicit $exec
12+
; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
13+
; GFX11-NEXT: [[V_BFI_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFI_B32_e64 killed [[S_MOV_B32_]], [[COPY1]], killed [[V_LSHLREV_B32_e64_]], implicit $exec
14+
; GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[V_BFI_B32_e64_]], implicit $exec
15+
; GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
16+
; GFX11-NEXT: SI_RETURN_TO_EPILOG $sgpr0
17+
%sign = fpext bfloat %sign.bf16 to float
18+
%op = call float @llvm.copysign.f32(float %mag, float %sign)
19+
%cast = bitcast float %op to i32
20+
ret i32 %cast
21+
}

0 commit comments

Comments
 (0)