Skip to content

Commit f6dacda

Browse files
authored
[RISCV] Fold vfmv.f.s into load from stack (#110129)
This is the f64/f32 version of #109774. I've left out f16 and bf16 for now because there's a separate issue where we can't select extract_vector_elt when f16/bf16 is a legal type, see #110126.
1 parent d1cd2c3 commit f6dacda

File tree

3 files changed

+125
-40
lines changed

3 files changed

+125
-40
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -784,6 +784,24 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
784784
}
785785
break;
786786
}
787+
if (RISCV::getRVVMCOpcode(MI.getOpcode()) == RISCV::VFMV_F_S) {
788+
unsigned Log2SEW =
789+
MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
790+
switch (Log2SEW) {
791+
case 4:
792+
// TODO: Support f16/bf16
793+
return nullptr;
794+
case 5:
795+
LoadOpc = RISCV::FLW;
796+
break;
797+
case 6:
798+
LoadOpc = RISCV::FLD;
799+
break;
800+
default:
801+
llvm_unreachable("Unexpected SEW");
802+
}
803+
break;
804+
}
787805
return nullptr;
788806
case RISCV::SEXT_H:
789807
LoadOpc = RISCV::LH;

llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll

Lines changed: 10 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -2261,10 +2261,7 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) {
22612261
; CHECK-V-NEXT: call __fixdfti
22622262
; CHECK-V-NEXT: mv s0, a0
22632263
; CHECK-V-NEXT: mv s1, a1
2264-
; CHECK-V-NEXT: addi a0, sp, 32
2265-
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
2266-
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2267-
; CHECK-V-NEXT: vfmv.f.s fa0, v8
2264+
; CHECK-V-NEXT: fld fa0, 32(sp) # 8-byte Folded Reload
22682265
; CHECK-V-NEXT: call __fixdfti
22692266
; CHECK-V-NEXT: li a2, -1
22702267
; CHECK-V-NEXT: srli a3, a2, 1
@@ -2394,10 +2391,7 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) {
23942391
; CHECK-V-NEXT: call __fixunsdfti
23952392
; CHECK-V-NEXT: mv s0, a0
23962393
; CHECK-V-NEXT: mv s1, a1
2397-
; CHECK-V-NEXT: addi a0, sp, 32
2398-
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
2399-
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2400-
; CHECK-V-NEXT: vfmv.f.s fa0, v8
2394+
; CHECK-V-NEXT: fld fa0, 32(sp) # 8-byte Folded Reload
24012395
; CHECK-V-NEXT: call __fixunsdfti
24022396
; CHECK-V-NEXT: snez a1, a1
24032397
; CHECK-V-NEXT: snez a2, s1
@@ -2506,10 +2500,7 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) {
25062500
; CHECK-V-NEXT: call __fixdfti
25072501
; CHECK-V-NEXT: mv s0, a0
25082502
; CHECK-V-NEXT: mv s1, a1
2509-
; CHECK-V-NEXT: addi a0, sp, 32
2510-
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
2511-
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2512-
; CHECK-V-NEXT: vfmv.f.s fa0, v8
2503+
; CHECK-V-NEXT: fld fa0, 32(sp) # 8-byte Folded Reload
25132504
; CHECK-V-NEXT: call __fixdfti
25142505
; CHECK-V-NEXT: mv a2, s1
25152506
; CHECK-V-NEXT: blez s1, .LBB20_2
@@ -2668,10 +2659,7 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) {
26682659
; CHECK-V-NEXT: call __fixsfti
26692660
; CHECK-V-NEXT: mv s0, a0
26702661
; CHECK-V-NEXT: mv s1, a1
2671-
; CHECK-V-NEXT: addi a0, sp, 32
2672-
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
2673-
; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2674-
; CHECK-V-NEXT: vfmv.f.s fa0, v8
2662+
; CHECK-V-NEXT: flw fa0, 32(sp) # 8-byte Folded Reload
26752663
; CHECK-V-NEXT: call __fixsfti
26762664
; CHECK-V-NEXT: li a2, -1
26772665
; CHECK-V-NEXT: srli a3, a2, 1
@@ -2801,10 +2789,7 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) {
28012789
; CHECK-V-NEXT: call __fixunssfti
28022790
; CHECK-V-NEXT: mv s0, a0
28032791
; CHECK-V-NEXT: mv s1, a1
2804-
; CHECK-V-NEXT: addi a0, sp, 32
2805-
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
2806-
; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2807-
; CHECK-V-NEXT: vfmv.f.s fa0, v8
2792+
; CHECK-V-NEXT: flw fa0, 32(sp) # 8-byte Folded Reload
28082793
; CHECK-V-NEXT: call __fixunssfti
28092794
; CHECK-V-NEXT: snez a1, a1
28102795
; CHECK-V-NEXT: snez a2, s1
@@ -2913,10 +2898,7 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) {
29132898
; CHECK-V-NEXT: call __fixsfti
29142899
; CHECK-V-NEXT: mv s0, a0
29152900
; CHECK-V-NEXT: mv s1, a1
2916-
; CHECK-V-NEXT: addi a0, sp, 32
2917-
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
2918-
; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2919-
; CHECK-V-NEXT: vfmv.f.s fa0, v8
2901+
; CHECK-V-NEXT: flw fa0, 32(sp) # 8-byte Folded Reload
29202902
; CHECK-V-NEXT: call __fixsfti
29212903
; CHECK-V-NEXT: mv a2, s1
29222904
; CHECK-V-NEXT: blez s1, .LBB23_2
@@ -5597,10 +5579,7 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) {
55975579
; CHECK-V-NEXT: call __fixdfti
55985580
; CHECK-V-NEXT: mv s0, a0
55995581
; CHECK-V-NEXT: mv s1, a1
5600-
; CHECK-V-NEXT: addi a0, sp, 32
5601-
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
5602-
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
5603-
; CHECK-V-NEXT: vfmv.f.s fa0, v8
5582+
; CHECK-V-NEXT: fld fa0, 32(sp) # 8-byte Folded Reload
56045583
; CHECK-V-NEXT: call __fixdfti
56055584
; CHECK-V-NEXT: li a2, -1
56065585
; CHECK-V-NEXT: srli a3, a2, 1
@@ -5831,10 +5810,7 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
58315810
; CHECK-V-NEXT: call __fixdfti
58325811
; CHECK-V-NEXT: mv s0, a0
58335812
; CHECK-V-NEXT: mv s1, a1
5834-
; CHECK-V-NEXT: addi a0, sp, 32
5835-
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
5836-
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
5837-
; CHECK-V-NEXT: vfmv.f.s fa0, v8
5813+
; CHECK-V-NEXT: fld fa0, 32(sp) # 8-byte Folded Reload
58385814
; CHECK-V-NEXT: call __fixdfti
58395815
; CHECK-V-NEXT: mv a2, a1
58405816
; CHECK-V-NEXT: blez a1, .LBB47_2
@@ -5983,10 +5959,7 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
59835959
; CHECK-V-NEXT: call __fixsfti
59845960
; CHECK-V-NEXT: mv s0, a0
59855961
; CHECK-V-NEXT: mv s1, a1
5986-
; CHECK-V-NEXT: addi a0, sp, 32
5987-
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
5988-
; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5989-
; CHECK-V-NEXT: vfmv.f.s fa0, v8
5962+
; CHECK-V-NEXT: flw fa0, 32(sp) # 8-byte Folded Reload
59905963
; CHECK-V-NEXT: call __fixsfti
59915964
; CHECK-V-NEXT: li a2, -1
59925965
; CHECK-V-NEXT: srli a3, a2, 1
@@ -6217,10 +6190,7 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
62176190
; CHECK-V-NEXT: call __fixsfti
62186191
; CHECK-V-NEXT: mv s0, a0
62196192
; CHECK-V-NEXT: mv s1, a1
6220-
; CHECK-V-NEXT: addi a0, sp, 32
6221-
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
6222-
; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6223-
; CHECK-V-NEXT: vfmv.f.s fa0, v8
6193+
; CHECK-V-NEXT: flw fa0, 32(sp) # 8-byte Folded Reload
62246194
; CHECK-V-NEXT: call __fixsfti
62256195
; CHECK-V-NEXT: mv a2, a1
62266196
; CHECK-V-NEXT: blez a1, .LBB50_2

llvm/test/CodeGen/RISCV/rvv/stack-folding.ll

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,3 +160,100 @@ truebb:
160160
falsebb:
161161
ret i8 0
162162
}
163+
164+
define double @f64(<vscale x 1 x double> %v, i1 %c) {
165+
; RV32-LABEL: f64:
166+
; RV32: # %bb.0:
167+
; RV32-NEXT: addi sp, sp, -16
168+
; RV32-NEXT: .cfi_def_cfa_offset 16
169+
; RV32-NEXT: csrr a1, vlenb
170+
; RV32-NEXT: slli a1, a1, 1
171+
; RV32-NEXT: sub sp, sp, a1
172+
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
173+
; RV32-NEXT: addi a1, sp, 16
174+
; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
175+
; RV32-NEXT: andi a0, a0, 1
176+
; RV32-NEXT: #APP
177+
; RV32-NEXT: #NO_APP
178+
; RV32-NEXT: beqz a0, .LBB4_2
179+
; RV32-NEXT: # %bb.1: # %truebb
180+
; RV32-NEXT: fld fa0, 16(sp) # 8-byte Folded Reload
181+
; RV32-NEXT: j .LBB4_3
182+
; RV32-NEXT: .LBB4_2: # %falsebb
183+
; RV32-NEXT: fcvt.d.w fa0, zero
184+
; RV32-NEXT: .LBB4_3: # %falsebb
185+
; RV32-NEXT: csrr a0, vlenb
186+
; RV32-NEXT: slli a0, a0, 1
187+
; RV32-NEXT: add sp, sp, a0
188+
; RV32-NEXT: addi sp, sp, 16
189+
; RV32-NEXT: ret
190+
;
191+
; RV64-LABEL: f64:
192+
; RV64: # %bb.0:
193+
; RV64-NEXT: addi sp, sp, -16
194+
; RV64-NEXT: .cfi_def_cfa_offset 16
195+
; RV64-NEXT: csrr a1, vlenb
196+
; RV64-NEXT: slli a1, a1, 1
197+
; RV64-NEXT: sub sp, sp, a1
198+
; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
199+
; RV64-NEXT: addi a1, sp, 16
200+
; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
201+
; RV64-NEXT: andi a0, a0, 1
202+
; RV64-NEXT: #APP
203+
; RV64-NEXT: #NO_APP
204+
; RV64-NEXT: beqz a0, .LBB4_2
205+
; RV64-NEXT: # %bb.1: # %truebb
206+
; RV64-NEXT: fld fa0, 16(sp) # 8-byte Folded Reload
207+
; RV64-NEXT: j .LBB4_3
208+
; RV64-NEXT: .LBB4_2: # %falsebb
209+
; RV64-NEXT: fmv.d.x fa0, zero
210+
; RV64-NEXT: .LBB4_3: # %falsebb
211+
; RV64-NEXT: csrr a0, vlenb
212+
; RV64-NEXT: slli a0, a0, 1
213+
; RV64-NEXT: add sp, sp, a0
214+
; RV64-NEXT: addi sp, sp, 16
215+
; RV64-NEXT: ret
216+
tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
217+
br i1 %c, label %truebb, label %falsebb
218+
truebb:
219+
%x = extractelement <vscale x 1 x double> %v, i32 0
220+
ret double %x
221+
falsebb:
222+
ret double 0.0
223+
}
224+
225+
define float @f32(<vscale x 2 x float> %v, i1 %c) {
226+
; CHECK-LABEL: f32:
227+
; CHECK: # %bb.0:
228+
; CHECK-NEXT: addi sp, sp, -16
229+
; CHECK-NEXT: .cfi_def_cfa_offset 16
230+
; CHECK-NEXT: csrr a1, vlenb
231+
; CHECK-NEXT: slli a1, a1, 1
232+
; CHECK-NEXT: sub sp, sp, a1
233+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
234+
; CHECK-NEXT: addi a1, sp, 16
235+
; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
236+
; CHECK-NEXT: andi a0, a0, 1
237+
; CHECK-NEXT: #APP
238+
; CHECK-NEXT: #NO_APP
239+
; CHECK-NEXT: beqz a0, .LBB5_2
240+
; CHECK-NEXT: # %bb.1: # %truebb
241+
; CHECK-NEXT: flw fa0, 16(sp) # 8-byte Folded Reload
242+
; CHECK-NEXT: j .LBB5_3
243+
; CHECK-NEXT: .LBB5_2: # %falsebb
244+
; CHECK-NEXT: fmv.w.x fa0, zero
245+
; CHECK-NEXT: .LBB5_3: # %falsebb
246+
; CHECK-NEXT: csrr a0, vlenb
247+
; CHECK-NEXT: slli a0, a0, 1
248+
; CHECK-NEXT: add sp, sp, a0
249+
; CHECK-NEXT: addi sp, sp, 16
250+
; CHECK-NEXT: ret
251+
tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
252+
br i1 %c, label %truebb, label %falsebb
253+
truebb:
254+
%x = extractelement <vscale x 2 x float> %v, i32 0
255+
ret float %x
256+
falsebb:
257+
ret float 0.0
258+
}
259+

0 commit comments

Comments
 (0)