Skip to content

Commit b5657d6

Browse files
authored
[RISCV] Reverse default assumption about performance of vlseN.v vd, (rs1), x0 (#98205)
Some cores implement an optimization for a strided load with an x0 stride, which results in fewer memory operations being performed then implied by VL since all address are the same. It seems to be the case that this is the case only for a minority of available implementations. We know that sifive-x280 does, but sifive-p670 and spacemit-x60 both do not. (To be more precise, measurements on the x60 appear to indicate that a stride of x0 has similar latency to a non-zero stride, and that both are about twice a vleN.v. I'm taking this to mean the x0 case is not optimized.) We had an existing flag by which a processor could opt out of this assumption but no upstream users. Instead of adding this flag to the p670 and x60, this patch reverses the default and adds the opt-in flag only to the x280.
1 parent f8dbe1d commit b5657d6

21 files changed

+896
-694
lines changed

llvm/lib/Target/RISCV/RISCVFeatures.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1264,9 +1264,9 @@ def FeaturePredictableSelectIsExpensive
12641264
: SubtargetFeature<"predictable-select-expensive", "PredictableSelectIsExpensive", "true",
12651265
"Prefer likely predicted branches over selects">;
12661266

1267-
def TuneNoOptimizedZeroStrideLoad
1268-
: SubtargetFeature<"no-optimized-zero-stride-load", "HasOptimizedZeroStrideLoad",
1269-
"false", "Hasn't optimized (perform fewer memory operations)"
1267+
def TuneOptimizedZeroStrideLoad
1268+
: SubtargetFeature<"optimized-zero-stride-load", "HasOptimizedZeroStrideLoad",
1269+
"true", "Optimized (perform fewer memory operations)"
12701270
"zero-stride vector load">;
12711271

12721272
def Experimental

llvm/lib/Target/RISCV/RISCVProcessors.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,8 @@ def SIFIVE_X280 : RISCVProcessorModel<"sifive-x280", SiFive7Model,
231231
FeatureStdExtZbb],
232232
[TuneSiFive7,
233233
FeaturePostRAScheduler,
234-
TuneDLenFactor2]>;
234+
TuneDLenFactor2,
235+
TuneOptimizedZeroStrideLoad]>;
235236

236237
def SIFIVE_P450 : RISCVProcessorModel<"sifive-p450", SiFiveP400Model,
237238
[Feature64Bit,

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll

Lines changed: 150 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -1137,37 +1137,67 @@ define <32 x double> @buildvec_v32f64(double %e0, double %e1, double %e2, double
11371137
define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double %e2, double %e3, double %e4, double %e5, double %e6, double %e7, double %e8, double %e9, double %e10, double %e11, double %e12, double %e13, double %e14, double %e15, double %e16, double %e17, double %e18, double %e19, double %e20, double %e21, double %e22, double %e23, double %e24, double %e25, double %e26, double %e27, double %e28, double %e29, double %e30, double %e31) vscale_range(2,2) {
11381138
; RV32-LABEL: buildvec_v32f64_exact_vlen:
11391139
; RV32: # %bb.0:
1140-
; RV32-NEXT: addi sp, sp, -32
1141-
; RV32-NEXT: .cfi_def_cfa_offset 32
1142-
; RV32-NEXT: fsd fs0, 24(sp) # 8-byte Folded Spill
1143-
; RV32-NEXT: fsd fs1, 16(sp) # 8-byte Folded Spill
1140+
; RV32-NEXT: addi sp, sp, -112
1141+
; RV32-NEXT: .cfi_def_cfa_offset 112
1142+
; RV32-NEXT: fsd fs0, 104(sp) # 8-byte Folded Spill
1143+
; RV32-NEXT: fsd fs1, 96(sp) # 8-byte Folded Spill
1144+
; RV32-NEXT: fsd fs2, 88(sp) # 8-byte Folded Spill
1145+
; RV32-NEXT: fsd fs3, 80(sp) # 8-byte Folded Spill
1146+
; RV32-NEXT: fsd fs4, 72(sp) # 8-byte Folded Spill
1147+
; RV32-NEXT: fsd fs5, 64(sp) # 8-byte Folded Spill
1148+
; RV32-NEXT: fsd fs6, 56(sp) # 8-byte Folded Spill
1149+
; RV32-NEXT: fsd fs7, 48(sp) # 8-byte Folded Spill
1150+
; RV32-NEXT: fsd fs8, 40(sp) # 8-byte Folded Spill
1151+
; RV32-NEXT: fsd fs9, 32(sp) # 8-byte Folded Spill
1152+
; RV32-NEXT: fsd fs10, 24(sp) # 8-byte Folded Spill
1153+
; RV32-NEXT: fsd fs11, 16(sp) # 8-byte Folded Spill
11441154
; RV32-NEXT: .cfi_offset fs0, -8
11451155
; RV32-NEXT: .cfi_offset fs1, -16
1156+
; RV32-NEXT: .cfi_offset fs2, -24
1157+
; RV32-NEXT: .cfi_offset fs3, -32
1158+
; RV32-NEXT: .cfi_offset fs4, -40
1159+
; RV32-NEXT: .cfi_offset fs5, -48
1160+
; RV32-NEXT: .cfi_offset fs6, -56
1161+
; RV32-NEXT: .cfi_offset fs7, -64
1162+
; RV32-NEXT: .cfi_offset fs8, -72
1163+
; RV32-NEXT: .cfi_offset fs9, -80
1164+
; RV32-NEXT: .cfi_offset fs10, -88
1165+
; RV32-NEXT: .cfi_offset fs11, -96
11461166
; RV32-NEXT: sw a6, 8(sp)
11471167
; RV32-NEXT: sw a7, 12(sp)
1148-
; RV32-NEXT: fld ft4, 8(sp)
1168+
; RV32-NEXT: fld ft6, 8(sp)
11491169
; RV32-NEXT: sw a4, 8(sp)
11501170
; RV32-NEXT: sw a5, 12(sp)
1151-
; RV32-NEXT: fld ft5, 8(sp)
1171+
; RV32-NEXT: fld ft7, 8(sp)
11521172
; RV32-NEXT: sw a2, 8(sp)
11531173
; RV32-NEXT: sw a3, 12(sp)
1154-
; RV32-NEXT: fld ft6, 8(sp)
1174+
; RV32-NEXT: fld ft8, 8(sp)
11551175
; RV32-NEXT: sw a0, 8(sp)
11561176
; RV32-NEXT: sw a1, 12(sp)
1157-
; RV32-NEXT: fld ft7, 8(sp)
1158-
; RV32-NEXT: fld ft0, 184(sp)
1159-
; RV32-NEXT: fld ft1, 168(sp)
1160-
; RV32-NEXT: fld ft2, 152(sp)
1161-
; RV32-NEXT: fld ft3, 136(sp)
1162-
; RV32-NEXT: fld ft8, 120(sp)
1163-
; RV32-NEXT: fld ft9, 104(sp)
1164-
; RV32-NEXT: fld ft10, 72(sp)
1165-
; RV32-NEXT: fld ft11, 88(sp)
1166-
; RV32-NEXT: fld fs0, 56(sp)
1167-
; RV32-NEXT: fld fs1, 40(sp)
1177+
; RV32-NEXT: fld ft9, 8(sp)
1178+
; RV32-NEXT: fld ft0, 264(sp)
1179+
; RV32-NEXT: fld ft1, 256(sp)
1180+
; RV32-NEXT: fld ft2, 248(sp)
1181+
; RV32-NEXT: fld ft3, 240(sp)
1182+
; RV32-NEXT: fld ft4, 232(sp)
1183+
; RV32-NEXT: fld ft5, 224(sp)
1184+
; RV32-NEXT: fld ft10, 216(sp)
1185+
; RV32-NEXT: fld ft11, 208(sp)
1186+
; RV32-NEXT: fld fs0, 200(sp)
1187+
; RV32-NEXT: fld fs1, 192(sp)
1188+
; RV32-NEXT: fld fs2, 184(sp)
1189+
; RV32-NEXT: fld fs3, 176(sp)
1190+
; RV32-NEXT: fld fs4, 152(sp)
1191+
; RV32-NEXT: fld fs5, 144(sp)
1192+
; RV32-NEXT: fld fs6, 168(sp)
1193+
; RV32-NEXT: fld fs7, 160(sp)
1194+
; RV32-NEXT: fld fs8, 136(sp)
1195+
; RV32-NEXT: fld fs9, 128(sp)
1196+
; RV32-NEXT: fld fs10, 120(sp)
1197+
; RV32-NEXT: fld fs11, 112(sp)
11681198
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1169-
; RV32-NEXT: vfmv.v.f v8, ft7
1170-
; RV32-NEXT: vfslide1down.vf v12, v8, ft6
1199+
; RV32-NEXT: vfmv.v.f v8, ft9
1200+
; RV32-NEXT: vfslide1down.vf v12, v8, ft8
11711201
; RV32-NEXT: vfmv.v.f v8, fa2
11721202
; RV32-NEXT: vfslide1down.vf v9, v8, fa3
11731203
; RV32-NEXT: vfmv.v.f v8, fa0
@@ -1176,55 +1206,71 @@ define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double
11761206
; RV32-NEXT: vfslide1down.vf v10, v10, fa5
11771207
; RV32-NEXT: vfmv.v.f v11, fa6
11781208
; RV32-NEXT: vfslide1down.vf v11, v11, fa7
1179-
; RV32-NEXT: addi a0, sp, 32
1180-
; RV32-NEXT: vlse64.v v14, (a0), zero
1181-
; RV32-NEXT: addi a0, sp, 48
1182-
; RV32-NEXT: vlse64.v v15, (a0), zero
1183-
; RV32-NEXT: vfmv.v.f v13, ft5
1184-
; RV32-NEXT: vfslide1down.vf v13, v13, ft4
1185-
; RV32-NEXT: vfslide1down.vf v14, v14, fs1
1186-
; RV32-NEXT: vfslide1down.vf v15, v15, fs0
1187-
; RV32-NEXT: addi a0, sp, 80
1188-
; RV32-NEXT: vlse64.v v16, (a0), zero
1189-
; RV32-NEXT: addi a0, sp, 64
1190-
; RV32-NEXT: vlse64.v v18, (a0), zero
1191-
; RV32-NEXT: addi a0, sp, 96
1192-
; RV32-NEXT: vlse64.v v19, (a0), zero
1193-
; RV32-NEXT: addi a0, sp, 112
1194-
; RV32-NEXT: vlse64.v v20, (a0), zero
1195-
; RV32-NEXT: vfslide1down.vf v17, v16, ft11
1196-
; RV32-NEXT: vfslide1down.vf v16, v18, ft10
1197-
; RV32-NEXT: vfslide1down.vf v18, v19, ft9
1198-
; RV32-NEXT: vfslide1down.vf v19, v20, ft8
1199-
; RV32-NEXT: addi a0, sp, 128
1200-
; RV32-NEXT: vlse64.v v20, (a0), zero
1201-
; RV32-NEXT: addi a0, sp, 144
1202-
; RV32-NEXT: vlse64.v v21, (a0), zero
1203-
; RV32-NEXT: addi a0, sp, 160
1204-
; RV32-NEXT: vlse64.v v22, (a0), zero
1205-
; RV32-NEXT: addi a0, sp, 176
1206-
; RV32-NEXT: vlse64.v v23, (a0), zero
1207-
; RV32-NEXT: vfslide1down.vf v20, v20, ft3
1208-
; RV32-NEXT: vfslide1down.vf v21, v21, ft2
1209-
; RV32-NEXT: vfslide1down.vf v22, v22, ft1
1209+
; RV32-NEXT: vfmv.v.f v13, ft7
1210+
; RV32-NEXT: vfslide1down.vf v13, v13, ft6
1211+
; RV32-NEXT: vfmv.v.f v14, fs11
1212+
; RV32-NEXT: vfslide1down.vf v14, v14, fs10
1213+
; RV32-NEXT: vfmv.v.f v15, fs9
1214+
; RV32-NEXT: vfslide1down.vf v15, v15, fs8
1215+
; RV32-NEXT: vfmv.v.f v16, fs7
1216+
; RV32-NEXT: vfslide1down.vf v17, v16, fs6
1217+
; RV32-NEXT: vfmv.v.f v16, fs5
1218+
; RV32-NEXT: vfslide1down.vf v16, v16, fs4
1219+
; RV32-NEXT: vfmv.v.f v18, fs3
1220+
; RV32-NEXT: vfslide1down.vf v18, v18, fs2
1221+
; RV32-NEXT: vfmv.v.f v19, fs1
1222+
; RV32-NEXT: vfslide1down.vf v19, v19, fs0
1223+
; RV32-NEXT: vfmv.v.f v20, ft11
1224+
; RV32-NEXT: vfslide1down.vf v20, v20, ft10
1225+
; RV32-NEXT: vfmv.v.f v21, ft5
1226+
; RV32-NEXT: vfslide1down.vf v21, v21, ft4
1227+
; RV32-NEXT: vfmv.v.f v22, ft3
1228+
; RV32-NEXT: vfslide1down.vf v22, v22, ft2
1229+
; RV32-NEXT: vfmv.v.f v23, ft1
12101230
; RV32-NEXT: vfslide1down.vf v23, v23, ft0
1211-
; RV32-NEXT: fld fs0, 24(sp) # 8-byte Folded Reload
1212-
; RV32-NEXT: fld fs1, 16(sp) # 8-byte Folded Reload
1213-
; RV32-NEXT: addi sp, sp, 32
1231+
; RV32-NEXT: fld fs0, 104(sp) # 8-byte Folded Reload
1232+
; RV32-NEXT: fld fs1, 96(sp) # 8-byte Folded Reload
1233+
; RV32-NEXT: fld fs2, 88(sp) # 8-byte Folded Reload
1234+
; RV32-NEXT: fld fs3, 80(sp) # 8-byte Folded Reload
1235+
; RV32-NEXT: fld fs4, 72(sp) # 8-byte Folded Reload
1236+
; RV32-NEXT: fld fs5, 64(sp) # 8-byte Folded Reload
1237+
; RV32-NEXT: fld fs6, 56(sp) # 8-byte Folded Reload
1238+
; RV32-NEXT: fld fs7, 48(sp) # 8-byte Folded Reload
1239+
; RV32-NEXT: fld fs8, 40(sp) # 8-byte Folded Reload
1240+
; RV32-NEXT: fld fs9, 32(sp) # 8-byte Folded Reload
1241+
; RV32-NEXT: fld fs10, 24(sp) # 8-byte Folded Reload
1242+
; RV32-NEXT: fld fs11, 16(sp) # 8-byte Folded Reload
1243+
; RV32-NEXT: addi sp, sp, 112
12141244
; RV32-NEXT: ret
12151245
;
12161246
; RV64-LABEL: buildvec_v32f64_exact_vlen:
12171247
; RV64: # %bb.0:
1218-
; RV64-NEXT: addi sp, sp, -32
1219-
; RV64-NEXT: .cfi_def_cfa_offset 32
1220-
; RV64-NEXT: fsd fs0, 24(sp) # 8-byte Folded Spill
1221-
; RV64-NEXT: fsd fs1, 16(sp) # 8-byte Folded Spill
1222-
; RV64-NEXT: fsd fs2, 8(sp) # 8-byte Folded Spill
1223-
; RV64-NEXT: fsd fs3, 0(sp) # 8-byte Folded Spill
1248+
; RV64-NEXT: addi sp, sp, -96
1249+
; RV64-NEXT: .cfi_def_cfa_offset 96
1250+
; RV64-NEXT: fsd fs0, 88(sp) # 8-byte Folded Spill
1251+
; RV64-NEXT: fsd fs1, 80(sp) # 8-byte Folded Spill
1252+
; RV64-NEXT: fsd fs2, 72(sp) # 8-byte Folded Spill
1253+
; RV64-NEXT: fsd fs3, 64(sp) # 8-byte Folded Spill
1254+
; RV64-NEXT: fsd fs4, 56(sp) # 8-byte Folded Spill
1255+
; RV64-NEXT: fsd fs5, 48(sp) # 8-byte Folded Spill
1256+
; RV64-NEXT: fsd fs6, 40(sp) # 8-byte Folded Spill
1257+
; RV64-NEXT: fsd fs7, 32(sp) # 8-byte Folded Spill
1258+
; RV64-NEXT: fsd fs8, 24(sp) # 8-byte Folded Spill
1259+
; RV64-NEXT: fsd fs9, 16(sp) # 8-byte Folded Spill
1260+
; RV64-NEXT: fsd fs10, 8(sp) # 8-byte Folded Spill
1261+
; RV64-NEXT: fsd fs11, 0(sp) # 8-byte Folded Spill
12241262
; RV64-NEXT: .cfi_offset fs0, -8
12251263
; RV64-NEXT: .cfi_offset fs1, -16
12261264
; RV64-NEXT: .cfi_offset fs2, -24
12271265
; RV64-NEXT: .cfi_offset fs3, -32
1266+
; RV64-NEXT: .cfi_offset fs4, -40
1267+
; RV64-NEXT: .cfi_offset fs5, -48
1268+
; RV64-NEXT: .cfi_offset fs6, -56
1269+
; RV64-NEXT: .cfi_offset fs7, -64
1270+
; RV64-NEXT: .cfi_offset fs8, -72
1271+
; RV64-NEXT: .cfi_offset fs9, -80
1272+
; RV64-NEXT: .cfi_offset fs10, -88
1273+
; RV64-NEXT: .cfi_offset fs11, -96
12281274
; RV64-NEXT: fmv.d.x ft4, a7
12291275
; RV64-NEXT: fmv.d.x ft5, a6
12301276
; RV64-NEXT: fmv.d.x ft6, a5
@@ -1233,14 +1279,22 @@ define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double
12331279
; RV64-NEXT: fmv.d.x ft9, a2
12341280
; RV64-NEXT: fmv.d.x ft10, a1
12351281
; RV64-NEXT: fmv.d.x ft11, a0
1236-
; RV64-NEXT: fld ft0, 152(sp)
1237-
; RV64-NEXT: fld ft1, 136(sp)
1238-
; RV64-NEXT: fld ft2, 120(sp)
1239-
; RV64-NEXT: fld ft3, 104(sp)
1240-
; RV64-NEXT: fld fs0, 88(sp)
1241-
; RV64-NEXT: fld fs1, 72(sp)
1242-
; RV64-NEXT: fld fs2, 40(sp)
1243-
; RV64-NEXT: fld fs3, 56(sp)
1282+
; RV64-NEXT: fld ft0, 216(sp)
1283+
; RV64-NEXT: fld ft1, 208(sp)
1284+
; RV64-NEXT: fld ft2, 200(sp)
1285+
; RV64-NEXT: fld ft3, 192(sp)
1286+
; RV64-NEXT: fld fs0, 184(sp)
1287+
; RV64-NEXT: fld fs1, 176(sp)
1288+
; RV64-NEXT: fld fs2, 168(sp)
1289+
; RV64-NEXT: fld fs3, 160(sp)
1290+
; RV64-NEXT: fld fs4, 152(sp)
1291+
; RV64-NEXT: fld fs5, 144(sp)
1292+
; RV64-NEXT: fld fs6, 136(sp)
1293+
; RV64-NEXT: fld fs7, 128(sp)
1294+
; RV64-NEXT: fld fs8, 104(sp)
1295+
; RV64-NEXT: fld fs9, 96(sp)
1296+
; RV64-NEXT: fld fs10, 120(sp)
1297+
; RV64-NEXT: fld fs11, 112(sp)
12441298
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
12451299
; RV64-NEXT: vfmv.v.f v8, fa2
12461300
; RV64-NEXT: vfslide1down.vf v9, v8, fa3
@@ -1258,35 +1312,35 @@ define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double
12581312
; RV64-NEXT: vfslide1down.vf v14, v14, ft6
12591313
; RV64-NEXT: vfmv.v.f v15, ft5
12601314
; RV64-NEXT: vfslide1down.vf v15, v15, ft4
1261-
; RV64-NEXT: addi a0, sp, 48
1262-
; RV64-NEXT: vlse64.v v16, (a0), zero
1263-
; RV64-NEXT: addi a0, sp, 32
1264-
; RV64-NEXT: vlse64.v v18, (a0), zero
1265-
; RV64-NEXT: addi a0, sp, 64
1266-
; RV64-NEXT: vlse64.v v19, (a0), zero
1267-
; RV64-NEXT: addi a0, sp, 80
1268-
; RV64-NEXT: vlse64.v v20, (a0), zero
1269-
; RV64-NEXT: vfslide1down.vf v17, v16, fs3
1270-
; RV64-NEXT: vfslide1down.vf v16, v18, fs2
1271-
; RV64-NEXT: vfslide1down.vf v18, v19, fs1
1272-
; RV64-NEXT: vfslide1down.vf v19, v20, fs0
1273-
; RV64-NEXT: addi a0, sp, 96
1274-
; RV64-NEXT: vlse64.v v20, (a0), zero
1275-
; RV64-NEXT: addi a0, sp, 112
1276-
; RV64-NEXT: vlse64.v v21, (a0), zero
1277-
; RV64-NEXT: addi a0, sp, 128
1278-
; RV64-NEXT: vlse64.v v22, (a0), zero
1279-
; RV64-NEXT: addi a0, sp, 144
1280-
; RV64-NEXT: vlse64.v v23, (a0), zero
1281-
; RV64-NEXT: vfslide1down.vf v20, v20, ft3
1282-
; RV64-NEXT: vfslide1down.vf v21, v21, ft2
1283-
; RV64-NEXT: vfslide1down.vf v22, v22, ft1
1315+
; RV64-NEXT: vfmv.v.f v16, fs11
1316+
; RV64-NEXT: vfslide1down.vf v17, v16, fs10
1317+
; RV64-NEXT: vfmv.v.f v16, fs9
1318+
; RV64-NEXT: vfslide1down.vf v16, v16, fs8
1319+
; RV64-NEXT: vfmv.v.f v18, fs7
1320+
; RV64-NEXT: vfslide1down.vf v18, v18, fs6
1321+
; RV64-NEXT: vfmv.v.f v19, fs5
1322+
; RV64-NEXT: vfslide1down.vf v19, v19, fs4
1323+
; RV64-NEXT: vfmv.v.f v20, fs3
1324+
; RV64-NEXT: vfslide1down.vf v20, v20, fs2
1325+
; RV64-NEXT: vfmv.v.f v21, fs1
1326+
; RV64-NEXT: vfslide1down.vf v21, v21, fs0
1327+
; RV64-NEXT: vfmv.v.f v22, ft3
1328+
; RV64-NEXT: vfslide1down.vf v22, v22, ft2
1329+
; RV64-NEXT: vfmv.v.f v23, ft1
12841330
; RV64-NEXT: vfslide1down.vf v23, v23, ft0
1285-
; RV64-NEXT: fld fs0, 24(sp) # 8-byte Folded Reload
1286-
; RV64-NEXT: fld fs1, 16(sp) # 8-byte Folded Reload
1287-
; RV64-NEXT: fld fs2, 8(sp) # 8-byte Folded Reload
1288-
; RV64-NEXT: fld fs3, 0(sp) # 8-byte Folded Reload
1289-
; RV64-NEXT: addi sp, sp, 32
1331+
; RV64-NEXT: fld fs0, 88(sp) # 8-byte Folded Reload
1332+
; RV64-NEXT: fld fs1, 80(sp) # 8-byte Folded Reload
1333+
; RV64-NEXT: fld fs2, 72(sp) # 8-byte Folded Reload
1334+
; RV64-NEXT: fld fs3, 64(sp) # 8-byte Folded Reload
1335+
; RV64-NEXT: fld fs4, 56(sp) # 8-byte Folded Reload
1336+
; RV64-NEXT: fld fs5, 48(sp) # 8-byte Folded Reload
1337+
; RV64-NEXT: fld fs6, 40(sp) # 8-byte Folded Reload
1338+
; RV64-NEXT: fld fs7, 32(sp) # 8-byte Folded Reload
1339+
; RV64-NEXT: fld fs8, 24(sp) # 8-byte Folded Reload
1340+
; RV64-NEXT: fld fs9, 16(sp) # 8-byte Folded Reload
1341+
; RV64-NEXT: fld fs10, 8(sp) # 8-byte Folded Reload
1342+
; RV64-NEXT: fld fs11, 0(sp) # 8-byte Folded Reload
1343+
; RV64-NEXT: addi sp, sp, 96
12901344
; RV64-NEXT: ret
12911345
%v0 = insertelement <32 x double> poison, double %e0, i64 0
12921346
%v1 = insertelement <32 x double> %v0, double %e1, i64 1

0 commit comments

Comments
 (0)