Skip to content

Commit 2af57b6

Browse files
committed
[PowerPC] Add prefix load pattern for fpext to v2f64
This patch adds a prefixed load pattern involving v2f32 fpext v2f64, where we are dealing with a value with an offset that fits into a 34-bit signed immediate. A reduced test case is also added to patch that tests the pattern, in which the pattern is tested in the big endian CHECKs of the newly added test. Differential Revision: https://reviews.llvm.org/D109887
1 parent ab6a69d commit 2af57b6

File tree

2 files changed

+58
-0
lines changed

2 files changed

+58
-0
lines changed

llvm/lib/Target/PowerPC/PPCInstrPrefix.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2790,6 +2790,10 @@ let Predicates = [PrefixInstrs] in {
27902790
def : Pat<(atomic_store_16 PDForm:$dst, i32:$RS), (PSTH $RS, memri34:$dst)>;
27912791
def : Pat<(atomic_store_32 PDForm:$dst, i32:$RS), (PSTW $RS, memri34:$dst)>;
27922792
def : Pat<(atomic_store_64 PDForm:$dst, i64:$RS), (PSTD $RS, memri34:$dst)>;
2793+
2794+
// Prefixed fpext to v2f64
2795+
def : Pat<(v4f32 (PPCldvsxlh PDForm:$src)),
2796+
(SUBREG_TO_REG (i64 1), (PLFD PDForm:$src), sub_64)>;
27932797
}
27942798

27952799
def InsertEltShift {

llvm/test/CodeGen/PowerPC/reduce_scalarization.ll

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,3 +108,57 @@ entry:
108108
%add = fadd <2 x double> %1, %a
109109
ret <2 x double> %add
110110
}
111+
112+
%0 = type <{ i32, i8, [1 x i8], i16, i32, i32, i8, [1 x i8], i16, i32, float, float, double, double, ppc_fp128, { float, float }, { float, float }, { double, double }, { double, double }, { ppc_fp128, ppc_fp128 }, [89856 x i8] }>
113+
@Glob1 = external dso_local unnamed_addr global [25 x %0], align 16
114+
115+
define dso_local i32 @test6() #0 {
116+
; CHECK-P10-LABEL: test6:
117+
; CHECK-P10: # %bb.0: # %bb
118+
; CHECK-P10-NEXT: plfd f0, Glob1@PCREL+562536(0), 1
119+
; CHECK-P10-NEXT: xxlxor vs1, vs1, vs1
120+
; CHECK-P10-NEXT: xxmrghw vs0, vs0, vs0
121+
; CHECK-P10-NEXT: xvcvspdp vs0, vs0
122+
; CHECK-P10-NEXT: xvcmpeqdp v2, vs1, vs0
123+
; CHECK-P10-NEXT: xxswapd v3, v2
124+
; CHECK-P10-NEXT: xxland vs0, v2, v3
125+
; CHECK-P10-NEXT: mfvsrld r3, vs0
126+
; CHECK-P10-NEXT: andi. r3, r3, 1
127+
; CHECK-P10-NEXT: bc 4, gt, .LBB5_2
128+
; CHECK-P10-NEXT: # %bb.1: # %bb8
129+
; CHECK-P10-NEXT: .LBB5_2: # %bb7
130+
;
131+
; CHECK-P10-BE-LABEL: test6:
132+
; CHECK-P10-BE: # %bb.0: # %bb
133+
; CHECK-P10-BE-NEXT: addis r3, r2, Glob1@toc@ha
134+
; CHECK-P10-BE-NEXT: xxlxor vs1, vs1, vs1
135+
; CHECK-P10-BE-NEXT: addi r3, r3, Glob1@toc@l
136+
; CHECK-P10-BE-NEXT: plfd f0, 562536(r3), 0
137+
; CHECK-P10-BE-NEXT: xxmrghw vs0, vs0, vs0
138+
; CHECK-P10-BE-NEXT: xvcvspdp vs0, vs0
139+
; CHECK-P10-BE-NEXT: xvcmpeqdp v2, vs1, vs0
140+
; CHECK-P10-BE-NEXT: xxswapd v3, v2
141+
; CHECK-P10-BE-NEXT: xxland vs0, v2, v3
142+
; CHECK-P10-BE-NEXT: mffprd r3, f0
143+
; CHECK-P10-BE-NEXT: andi. r3, r3, 1
144+
; CHECK-P10-BE-NEXT: bc 4, gt, .LBB5_2
145+
; CHECK-P10-BE-NEXT: # %bb.1: # %bb8
146+
; CHECK-P10-BE-NEXT: .LBB5_2: # %bb7
147+
bb:
148+
br label %bb1
149+
150+
bb1: ; preds = %bb
151+
%i = load <2 x float>, <2 x float>* bitcast (i8* getelementptr inbounds ([25 x %0], [25 x %0]* @Glob1, i64 0, i64 6, i32 20, i64 22392) to <2 x float>*), align 8
152+
%i2 = fpext <2 x float> %i to <2 x double>
153+
%i3 = fcmp contract oeq <2 x double> zeroinitializer, %i2
154+
%i4 = shufflevector <2 x i1> %i3, <2 x i1> poison, <2 x i32> <i32 1, i32 undef>
155+
%i5 = and <2 x i1> %i3, %i4
156+
%i6 = extractelement <2 x i1> %i5, i32 0
157+
br i1 %i6, label %bb8, label %bb7
158+
159+
bb7: ; preds = %bb1
160+
unreachable
161+
162+
bb8: ; preds = %bb1
163+
unreachable
164+
}

0 commit comments

Comments
 (0)