-
Notifications
You must be signed in to change notification settings - Fork 14.3k
release/20.x: [PowerPC] Support conversion between f16 and f128 (#130158) #132049
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Enables conversion between f16 and f128. Expanding on pre-Power9 targets and using HW instructions on Power9. Fixes llvm#92866 Commandeer of: llvm#97677 --------- Co-authored-by: esmeyi <[email protected]> (cherry picked from commit ade22fc)
@tgross35 What do you think about merging this PR to the release branch? |
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-backend-powerpc Author: None (llvmbot) ChangesBackport ade22fc Requested by: @lei137 Patch is 59.33 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/132049.diff 5 Files Affected:
diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp
index e38fce764b640..085a3bc0586b6 100644
--- a/llvm/lib/IR/RuntimeLibcalls.cpp
+++ b/llvm/lib/IR/RuntimeLibcalls.cpp
@@ -82,6 +82,7 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT) {
setLibcallName(RTLIB::POWI_F128, "__powikf2");
setLibcallName(RTLIB::FPEXT_F32_F128, "__extendsfkf2");
setLibcallName(RTLIB::FPEXT_F64_F128, "__extenddfkf2");
+ setLibcallName(RTLIB::FPROUND_F128_F16, "__trunckfhf2");
setLibcallName(RTLIB::FPROUND_F128_F32, "__trunckfsf2");
setLibcallName(RTLIB::FPROUND_F128_F64, "__trunckfdf2");
setLibcallName(RTLIB::FPTOSINT_F128_I32, "__fixkfsi");
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 21ff6f050817a..16491a145a5b9 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -223,13 +223,19 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
}
+ setTruncStoreAction(MVT::f128, MVT::f16, Expand);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f128, Expand);
+
if (Subtarget.isISA3_0()) {
+ setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Legal);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
setTruncStoreAction(MVT::f64, MVT::f16, Legal);
setTruncStoreAction(MVT::f32, MVT::f16, Legal);
} else {
// No extending loads from f16 or HW conversions back and forth.
+ setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
+ setOperationAction(ISD::FP16_TO_FP, MVT::f128, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 8e400bc63b785..a8724ea125140 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -3997,6 +3997,8 @@ defm : ScalToVecWPermute<
(SUBREG_TO_REG (i64 1), (VEXTSH2Ds (LXSIHZX ForceXForm:$src)), sub_64)>;
// Load/convert and convert/store patterns for f16.
+def : Pat<(f128 (extloadf16 ForceXForm:$src)),
+ (f128 (XSCVDPQP (XSCVHPDP (LXSIHZX ForceXForm:$src))))>;
def : Pat<(f64 (extloadf16 ForceXForm:$src)),
(f64 (XSCVHPDP (LXSIHZX ForceXForm:$src)))>;
def : Pat<(truncstoref16 f64:$src, ForceXForm:$dst),
@@ -4005,6 +4007,8 @@ def : Pat<(f32 (extloadf16 ForceXForm:$src)),
(f32 (COPY_TO_REGCLASS (XSCVHPDP (LXSIHZX ForceXForm:$src)), VSSRC))>;
def : Pat<(truncstoref16 f32:$src, ForceXForm:$dst),
(STXSIHX (XSCVDPHP (COPY_TO_REGCLASS $src, VSFRC)), ForceXForm:$dst)>;
+def : Pat<(f128 (f16_to_fp i32:$A)),
+ (f128 (XSCVDPQP (XSCVHPDP (MTVSRWZ $A))))>;
def : Pat<(f64 (f16_to_fp i32:$A)),
(f64 (XSCVHPDP (MTVSRWZ $A)))>;
def : Pat<(f32 (f16_to_fp i32:$A)),
diff --git a/llvm/test/CodeGen/PowerPC/f128-conv.ll b/llvm/test/CodeGen/PowerPC/f128-conv.ll
index d8eed1fb4092c..f8b2861156db4 100644
--- a/llvm/test/CodeGen/PowerPC/f128-conv.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-conv.ll
@@ -10,11 +10,11 @@
@umem = global [5 x i64] [i64 560, i64 100, i64 34, i64 2, i64 5], align 8
@swMem = global [5 x i32] [i32 5, i32 2, i32 3, i32 4, i32 0], align 4
@uwMem = global [5 x i32] [i32 5, i32 2, i32 3, i32 4, i32 0], align 4
-@uhwMem = local_unnamed_addr global [5 x i16] [i16 5, i16 2, i16 3, i16 4, i16 0], align 2
-@ubMem = local_unnamed_addr global [5 x i8] c"\05\02\03\04\00", align 1
+@uhwMem = global [5 x i16] [i16 5, i16 2, i16 3, i16 4, i16 0], align 2
+@ubMem = global [5 x i8] c"\05\02\03\04\00", align 1
; Function Attrs: norecurse nounwind
-define void @sdwConv2qp(ptr nocapture %a, i64 %b) {
+define void @sdwConv2qp(ptr nocapture %a, i64 %b) nounwind {
; CHECK-LABEL: sdwConv2qp:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mtvsrd v2, r4
@@ -25,9 +25,6 @@ define void @sdwConv2qp(ptr nocapture %a, i64 %b) {
; CHECK-P8-LABEL: sdwConv2qp:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mflr r0
-; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
-; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: stdu r1, -48(r1)
; CHECK-P8-NEXT: mr r30, r3
@@ -50,13 +47,10 @@ entry:
}
; Function Attrs: norecurse nounwind
-define void @sdwConv2qp_01(ptr nocapture %a, i128 %b) {
+define void @sdwConv2qp_01(ptr nocapture %a, i128 %b) nounwind {
; CHECK-LABEL: sdwConv2qp_01:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mflr r0
-; CHECK-NEXT: .cfi_def_cfa_offset 48
-; CHECK-NEXT: .cfi_offset lr, 16
-; CHECK-NEXT: .cfi_offset r30, -16
; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: stdu r1, -48(r1)
; CHECK-NEXT: mr r30, r3
@@ -75,9 +69,6 @@ define void @sdwConv2qp_01(ptr nocapture %a, i128 %b) {
; CHECK-P8-LABEL: sdwConv2qp_01:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mflr r0
-; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
-; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: stdu r1, -48(r1)
; CHECK-P8-NEXT: mr r30, r3
@@ -101,7 +92,7 @@ entry:
}
; Function Attrs: norecurse nounwind
-define void @sdwConv2qp_02(ptr nocapture %a) {
+define void @sdwConv2qp_02(ptr nocapture %a) nounwind {
; CHECK-LABEL: sdwConv2qp_02:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis r4, r2, .LC0@toc@ha
@@ -114,9 +105,6 @@ define void @sdwConv2qp_02(ptr nocapture %a) {
; CHECK-P8-LABEL: sdwConv2qp_02:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mflr r0
-; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
-; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: stdu r1, -48(r1)
; CHECK-P8-NEXT: mr r30, r3
@@ -134,16 +122,16 @@ define void @sdwConv2qp_02(ptr nocapture %a) {
; CHECK-P8-NEXT: mtlr r0
; CHECK-P8-NEXT: blr
entry:
- %0 = load i64, ptr getelementptr inbounds
+ %i = load i64, ptr getelementptr inbounds
([5 x i64], ptr @mem, i64 0, i64 2), align 8
- %conv = sitofp i64 %0 to fp128
+ %conv = sitofp i64 %i to fp128
store fp128 %conv, ptr %a, align 16
ret void
}
; Function Attrs: norecurse nounwind
-define void @sdwConv2qp_03(ptr nocapture %a, ptr nocapture readonly %b) {
+define void @sdwConv2qp_03(ptr nocapture %a, ptr nocapture readonly %b) nounwind {
; CHECK-LABEL: sdwConv2qp_03:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxsd v2, 0(r4)
@@ -154,9 +142,6 @@ define void @sdwConv2qp_03(ptr nocapture %a, ptr nocapture readonly %b) {
; CHECK-P8-LABEL: sdwConv2qp_03:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mflr r0
-; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
-; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: stdu r1, -48(r1)
; CHECK-P8-NEXT: std r0, 64(r1)
@@ -172,15 +157,15 @@ define void @sdwConv2qp_03(ptr nocapture %a, ptr nocapture readonly %b) {
; CHECK-P8-NEXT: mtlr r0
; CHECK-P8-NEXT: blr
entry:
- %0 = load i64, ptr %b, align 8
- %conv = sitofp i64 %0 to fp128
+ %i = load i64, ptr %b, align 8
+ %conv = sitofp i64 %i to fp128
store fp128 %conv, ptr %a, align 16
ret void
}
; Function Attrs: norecurse nounwind
-define void @sdwConv2qp_04(ptr nocapture %a, i1 %b) {
+define void @sdwConv2qp_04(ptr nocapture %a, i1 %b) nounwind {
; CHECK-LABEL: sdwConv2qp_04:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi. r4, r4, 1
@@ -195,9 +180,6 @@ define void @sdwConv2qp_04(ptr nocapture %a, i1 %b) {
; CHECK-P8-LABEL: sdwConv2qp_04:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mflr r0
-; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
-; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: stdu r1, -48(r1)
; CHECK-P8-NEXT: mr r30, r3
@@ -223,7 +205,7 @@ entry:
}
; Function Attrs: norecurse nounwind
-define void @udwConv2qp(ptr nocapture %a, i64 %b) {
+define void @udwConv2qp(ptr nocapture %a, i64 %b) nounwind {
; CHECK-LABEL: udwConv2qp:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mtvsrd v2, r4
@@ -234,9 +216,6 @@ define void @udwConv2qp(ptr nocapture %a, i64 %b) {
; CHECK-P8-LABEL: udwConv2qp:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mflr r0
-; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
-; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: stdu r1, -48(r1)
; CHECK-P8-NEXT: mr r30, r3
@@ -259,13 +238,10 @@ entry:
}
; Function Attrs: norecurse nounwind
-define void @udwConv2qp_01(ptr nocapture %a, i128 %b) {
+define void @udwConv2qp_01(ptr nocapture %a, i128 %b) nounwind {
; CHECK-LABEL: udwConv2qp_01:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mflr r0
-; CHECK-NEXT: .cfi_def_cfa_offset 48
-; CHECK-NEXT: .cfi_offset lr, 16
-; CHECK-NEXT: .cfi_offset r30, -16
; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: stdu r1, -48(r1)
; CHECK-NEXT: mr r30, r3
@@ -284,9 +260,6 @@ define void @udwConv2qp_01(ptr nocapture %a, i128 %b) {
; CHECK-P8-LABEL: udwConv2qp_01:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mflr r0
-; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
-; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: stdu r1, -48(r1)
; CHECK-P8-NEXT: mr r30, r3
@@ -310,7 +283,7 @@ entry:
}
; Function Attrs: norecurse nounwind
-define void @udwConv2qp_02(ptr nocapture %a) {
+define void @udwConv2qp_02(ptr nocapture %a) nounwind {
; CHECK-LABEL: udwConv2qp_02:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis r4, r2, .LC1@toc@ha
@@ -323,9 +296,6 @@ define void @udwConv2qp_02(ptr nocapture %a) {
; CHECK-P8-LABEL: udwConv2qp_02:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mflr r0
-; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
-; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: stdu r1, -48(r1)
; CHECK-P8-NEXT: mr r30, r3
@@ -343,16 +313,16 @@ define void @udwConv2qp_02(ptr nocapture %a) {
; CHECK-P8-NEXT: mtlr r0
; CHECK-P8-NEXT: blr
entry:
- %0 = load i64, ptr getelementptr inbounds
+ %i = load i64, ptr getelementptr inbounds
([5 x i64], ptr @umem, i64 0, i64 4), align 8
- %conv = uitofp i64 %0 to fp128
+ %conv = uitofp i64 %i to fp128
store fp128 %conv, ptr %a, align 16
ret void
}
; Function Attrs: norecurse nounwind
-define void @udwConv2qp_03(ptr nocapture %a, ptr nocapture readonly %b) {
+define void @udwConv2qp_03(ptr nocapture %a, ptr nocapture readonly %b) nounwind {
; CHECK-LABEL: udwConv2qp_03:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxsd v2, 0(r4)
@@ -363,9 +333,6 @@ define void @udwConv2qp_03(ptr nocapture %a, ptr nocapture readonly %b) {
; CHECK-P8-LABEL: udwConv2qp_03:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mflr r0
-; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
-; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: stdu r1, -48(r1)
; CHECK-P8-NEXT: std r0, 64(r1)
@@ -381,15 +348,15 @@ define void @udwConv2qp_03(ptr nocapture %a, ptr nocapture readonly %b) {
; CHECK-P8-NEXT: mtlr r0
; CHECK-P8-NEXT: blr
entry:
- %0 = load i64, ptr %b, align 8
- %conv = uitofp i64 %0 to fp128
+ %i = load i64, ptr %b, align 8
+ %conv = uitofp i64 %i to fp128
store fp128 %conv, ptr %a, align 16
ret void
}
; Function Attrs: norecurse nounwind
-define void @udwConv2qp_04(ptr nocapture %a, i1 %b) {
+define void @udwConv2qp_04(ptr nocapture %a, i1 %b) nounwind {
; CHECK-LABEL: udwConv2qp_04:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: clrlwi r4, r4, 31
@@ -401,9 +368,6 @@ define void @udwConv2qp_04(ptr nocapture %a, i1 %b) {
; CHECK-P8-LABEL: udwConv2qp_04:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mflr r0
-; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
-; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: stdu r1, -48(r1)
; CHECK-P8-NEXT: mr r30, r3
@@ -439,9 +403,6 @@ define ptr @sdwConv2qp_testXForm(ptr returned %sink,
; CHECK-P8-LABEL: sdwConv2qp_testXForm:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mflr r0
-; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
-; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: stdu r1, -48(r1)
; CHECK-P8-NEXT: mr r30, r3
@@ -459,11 +420,11 @@ define ptr @sdwConv2qp_testXForm(ptr returned %sink,
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mtlr r0
; CHECK-P8-NEXT: blr
- ptr nocapture readonly %a) {
+ ptr nocapture readonly %a) nounwind {
entry:
%add.ptr = getelementptr inbounds i8, ptr %a, i64 73333
- %0 = load i64, ptr %add.ptr, align 8
- %conv = sitofp i64 %0 to fp128
+ %i = load i64, ptr %add.ptr, align 8
+ %conv = sitofp i64 %i to fp128
store fp128 %conv, ptr %sink, align 16
ret ptr %sink
@@ -483,9 +444,6 @@ define ptr @udwConv2qp_testXForm(ptr returned %sink,
; CHECK-P8-LABEL: udwConv2qp_testXForm:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mflr r0
-; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
-; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: stdu r1, -48(r1)
; CHECK-P8-NEXT: mr r30, r3
@@ -503,18 +461,18 @@ define ptr @udwConv2qp_testXForm(ptr returned %sink,
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mtlr r0
; CHECK-P8-NEXT: blr
- ptr nocapture readonly %a) {
+ ptr nocapture readonly %a) nounwind {
entry:
%add.ptr = getelementptr inbounds i8, ptr %a, i64 73333
- %0 = load i64, ptr %add.ptr, align 8
- %conv = uitofp i64 %0 to fp128
+ %i = load i64, ptr %add.ptr, align 8
+ %conv = uitofp i64 %i to fp128
store fp128 %conv, ptr %sink, align 16
ret ptr %sink
}
; Function Attrs: norecurse nounwind
-define void @swConv2qp(ptr nocapture %a, i32 signext %b) {
+define void @swConv2qp(ptr nocapture %a, i32 signext %b) nounwind {
; CHECK-LABEL: swConv2qp:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mtvsrwa v2, r4
@@ -525,9 +483,6 @@ define void @swConv2qp(ptr nocapture %a, i32 signext %b) {
; CHECK-P8-LABEL: swConv2qp:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mflr r0
-; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
-; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: stdu r1, -48(r1)
; CHECK-P8-NEXT: mr r30, r3
@@ -550,7 +505,7 @@ entry:
}
; Function Attrs: norecurse nounwind
-define void @swConv2qp_02(ptr nocapture %a, ptr nocapture readonly %b) {
+define void @swConv2qp_02(ptr nocapture %a, ptr nocapture readonly %b) nounwind {
; CHECK-LABEL: swConv2qp_02:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxsiwax v2, 0, r4
@@ -561,9 +516,6 @@ define void @swConv2qp_02(ptr nocapture %a, ptr nocapture readonly %b) {
; CHECK-P8-LABEL: swConv2qp_02:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mflr r0
-; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
-; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: stdu r1, -48(r1)
; CHECK-P8-NEXT: std r0, 64(r1)
@@ -579,15 +531,15 @@ define void @swConv2qp_02(ptr nocapture %a, ptr nocapture readonly %b) {
; CHECK-P8-NEXT: mtlr r0
; CHECK-P8-NEXT: blr
entry:
- %0 = load i32, ptr %b, align 4
- %conv = sitofp i32 %0 to fp128
+ %i = load i32, ptr %b, align 4
+ %conv = sitofp i32 %i to fp128
store fp128 %conv, ptr %a, align 16
ret void
}
; Function Attrs: norecurse nounwind
-define void @swConv2qp_03(ptr nocapture %a) {
+define void @swConv2qp_03(ptr nocapture %a) nounwind {
; CHECK-LABEL: swConv2qp_03:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis r4, r2, .LC2@toc@ha
@@ -601,9 +553,6 @@ define void @swConv2qp_03(ptr nocapture %a) {
; CHECK-P8-LABEL: swConv2qp_03:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mflr r0
-; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
-; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: stdu r1, -48(r1)
; CHECK-P8-NEXT: mr r30, r3
@@ -621,16 +570,16 @@ define void @swConv2qp_03(ptr nocapture %a) {
; CHECK-P8-NEXT: mtlr r0
; CHECK-P8-NEXT: blr
entry:
- %0 = load i32, ptr getelementptr inbounds
+ %i = load i32, ptr getelementptr inbounds
([5 x i32], ptr @swMem, i64 0, i64 3), align 4
- %conv = sitofp i32 %0 to fp128
+ %conv = sitofp i32 %i to fp128
store fp128 %conv, ptr %a, align 16
ret void
}
; Function Attrs: norecurse nounwind
-define void @uwConv2qp(ptr nocapture %a, i32 zeroext %b) {
+define void @uwConv2qp(ptr nocapture %a, i32 zeroext %b) nounwind {
; CHECK-LABEL: uwConv2qp:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mtvsrwz v2, r4
@@ -641,9 +590,6 @@ define void @uwConv2qp(ptr nocapture %a, i32 zeroext %b) {
; CHECK-P8-LABEL: uwConv2qp:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mflr r0
-; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
-; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: stdu r1, -48(r1)
; CHECK-P8-NEXT: mr r30, r3
@@ -666,7 +612,7 @@ entry:
}
; Function Attrs: norecurse nounwind
-define void @uwConv2qp_02(ptr nocapture %a, ptr nocapture readonly %b) {
+define void @uwConv2qp_02(ptr nocapture %a, ptr nocapture readonly %b) nounwind {
; CHECK-LABEL: uwConv2qp_02:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxsiwzx v2, 0, r4
@@ -677,9 +623,6 @@ define void @uwConv2qp_02(ptr nocapture %a, ptr nocapture readonly %b) {
; CHECK-P8-LABEL: uwConv2qp_02:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mflr r0
-; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
-; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: stdu r1, -48(r1)
; CHECK-P8-NEXT: std r0, 64(r1)
@@ -695,15 +638,15 @@ define void @uwConv2qp_02(ptr nocapture %a, ptr nocapture readonly %b) {
; CHECK-P8-NEXT: mtlr r0
; CHECK-P8-NEXT: blr
entry:
- %0 = load i32, ptr %b, align 4
- %conv = uitofp i32 %0 to fp128
+ %i = load i32, ptr %b, align 4
+ %conv = uitofp i32 %i to fp128
store fp128 %conv, ptr %a, align 16
ret void
}
; Function Attrs: norecurse nounwind
-define void @uwConv2qp_03(ptr nocapture %a) {
+define void @uwConv2qp_03(ptr nocapture %a) nounwind {
; CHECK-LABEL: uwConv2qp_03:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis r4, r2, .LC3@toc@ha
@@ -717,9 +660,6 @@ define void @uwConv2qp_03(ptr nocapture %a) {
; CHECK-P8-LABEL: uwConv2qp_03:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mflr r0
-; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
-; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: stdu r1, -48(r1)
; CHECK-P8-NEXT: mr r30, r3
@@ -737,9 +677,9 @@ define ...
[truncated]
|
I'd be happy to have it but it's certainly not my call. @RolandF77 was the original reviewer. |
Seems like the tests are failing because #126880 hasn't been backported. Probably should just adjust the tests accordingly. What's standard practice here? Should someone with commit access just push a fix to the PR branch? |
I will create a local branch with the fix and open a new PR to get this backported. |
New PR to backport to V20.x: #133279 |
Backport ade22fc
Requested by: @lei137