Skip to content

Commit f7c3627

Browse files
authored
DAG: Implement promotion for strict_fpextend (llvm#74310)
Test is a placeholder, will be merged into the existing test after additional bug fixes for illegal f16 targets are fixed.
1 parent 625197d commit f7c3627

File tree

3 files changed

+67
-0
lines changed

3 files changed

+67
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2214,6 +2214,9 @@ bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) {
22142214
case ISD::FP_TO_UINT_SAT:
22152215
R = PromoteFloatOp_FP_TO_XINT_SAT(N, OpNo); break;
22162216
case ISD::FP_EXTEND: R = PromoteFloatOp_FP_EXTEND(N, OpNo); break;
2217+
case ISD::STRICT_FP_EXTEND:
2218+
R = PromoteFloatOp_STRICT_FP_EXTEND(N, OpNo);
2219+
break;
22172220
case ISD::SELECT_CC: R = PromoteFloatOp_SELECT_CC(N, OpNo); break;
22182221
case ISD::SETCC: R = PromoteFloatOp_SETCC(N, OpNo); break;
22192222
case ISD::STORE: R = PromoteFloatOp_STORE(N, OpNo); break;
@@ -2276,6 +2279,26 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo) {
22762279
return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Op);
22772280
}
22782281

2282+
SDValue DAGTypeLegalizer::PromoteFloatOp_STRICT_FP_EXTEND(SDNode *N,
2283+
unsigned OpNo) {
2284+
assert(OpNo == 1);
2285+
2286+
SDValue Op = GetPromotedFloat(N->getOperand(1));
2287+
EVT VT = N->getValueType(0);
2288+
2289+
// Desired VT is same as promoted type. Use promoted float directly.
2290+
if (VT == Op->getValueType(0)) {
2291+
ReplaceValueWith(SDValue(N, 1), N->getOperand(0));
2292+
return Op;
2293+
}
2294+
2295+
// Else, extend the promoted float value to the desired VT.
2296+
SDValue Res = DAG.getNode(ISD::STRICT_FP_EXTEND, SDLoc(N), N->getVTList(),
2297+
N->getOperand(0), Op);
2298+
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
2299+
return Res;
2300+
}
2301+
22792302
// Promote the float operands used for comparison. The true- and false-
22802303
// operands have the same type as the result and are promoted, if needed, by
22812304
// PromoteFloatRes_SELECT_CC

llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -713,6 +713,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
713713
SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo);
714714
SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo);
715715
SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo);
716+
SDValue PromoteFloatOp_STRICT_FP_EXTEND(SDNode *N, unsigned OpNo);
716717
SDValue PromoteFloatOp_UnaryOp(SDNode *N, unsigned OpNo);
717718
SDValue PromoteFloatOp_FP_TO_XINT_SAT(SDNode *N, unsigned OpNo);
718719
SDValue PromoteFloatOp_STORE(SDNode *N, unsigned OpNo);
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefixes=GFX7 %s
3+
4+
declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) #0
5+
declare <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half>, metadata) #0
6+
7+
define float @v_constrained_fpext_f16_to_f32(ptr addrspace(1) %ptr) #0 {
8+
; GFX7-LABEL: v_constrained_fpext_f16_to_f32:
9+
; GFX7: ; %bb.0:
10+
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11+
; GFX7-NEXT: s_mov_b32 s6, 0
12+
; GFX7-NEXT: s_mov_b32 s7, 0xf000
13+
; GFX7-NEXT: s_mov_b32 s4, s6
14+
; GFX7-NEXT: s_mov_b32 s5, s6
15+
; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
16+
; GFX7-NEXT: s_waitcnt vmcnt(0)
17+
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
18+
; GFX7-NEXT: s_setpc_b64 s[30:31]
19+
%val = load half, ptr addrspace(1) %ptr
20+
%result = call float @llvm.experimental.constrained.fpext.f32.f16(half %val, metadata !"fpexcept.strict")
21+
ret float %result
22+
}
23+
24+
define <2 x float> @v_constrained_fpext_v2f16_to_v2f32(ptr addrspace(1) %ptr) #0 {
25+
; GFX7-LABEL: v_constrained_fpext_v2f16_to_v2f32:
26+
; GFX7: ; %bb.0:
27+
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
28+
; GFX7-NEXT: s_mov_b32 s6, 0
29+
; GFX7-NEXT: s_mov_b32 s7, 0xf000
30+
; GFX7-NEXT: s_mov_b32 s4, s6
31+
; GFX7-NEXT: s_mov_b32 s5, s6
32+
; GFX7-NEXT: buffer_load_dword v1, v[0:1], s[4:7], 0 addr64
33+
; GFX7-NEXT: s_waitcnt vmcnt(0)
34+
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v1
35+
; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v1
36+
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
37+
; GFX7-NEXT: s_setpc_b64 s[30:31]
38+
%val = load <2 x half>, ptr addrspace(1) %ptr
39+
%result = call <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half> %val, metadata !"fpexcept.strict")
40+
ret <2 x float> %result
41+
}
42+
43+
attributes #0 = { strictfp }

0 commit comments

Comments
 (0)