@@ -1137,37 +1137,67 @@ define <32 x double> @buildvec_v32f64(double %e0, double %e1, double %e2, double
1137
1137
define <32 x double > @buildvec_v32f64_exact_vlen (double %e0 , double %e1 , double %e2 , double %e3 , double %e4 , double %e5 , double %e6 , double %e7 , double %e8 , double %e9 , double %e10 , double %e11 , double %e12 , double %e13 , double %e14 , double %e15 , double %e16 , double %e17 , double %e18 , double %e19 , double %e20 , double %e21 , double %e22 , double %e23 , double %e24 , double %e25 , double %e26 , double %e27 , double %e28 , double %e29 , double %e30 , double %e31 ) vscale_range(2 ,2 ) {
1138
1138
; RV32-LABEL: buildvec_v32f64_exact_vlen:
1139
1139
; RV32: # %bb.0:
1140
- ; RV32-NEXT: addi sp, sp, -32
1141
- ; RV32-NEXT: .cfi_def_cfa_offset 32
1142
- ; RV32-NEXT: fsd fs0, 24(sp) # 8-byte Folded Spill
1143
- ; RV32-NEXT: fsd fs1, 16(sp) # 8-byte Folded Spill
1140
+ ; RV32-NEXT: addi sp, sp, -112
1141
+ ; RV32-NEXT: .cfi_def_cfa_offset 112
1142
+ ; RV32-NEXT: fsd fs0, 104(sp) # 8-byte Folded Spill
1143
+ ; RV32-NEXT: fsd fs1, 96(sp) # 8-byte Folded Spill
1144
+ ; RV32-NEXT: fsd fs2, 88(sp) # 8-byte Folded Spill
1145
+ ; RV32-NEXT: fsd fs3, 80(sp) # 8-byte Folded Spill
1146
+ ; RV32-NEXT: fsd fs4, 72(sp) # 8-byte Folded Spill
1147
+ ; RV32-NEXT: fsd fs5, 64(sp) # 8-byte Folded Spill
1148
+ ; RV32-NEXT: fsd fs6, 56(sp) # 8-byte Folded Spill
1149
+ ; RV32-NEXT: fsd fs7, 48(sp) # 8-byte Folded Spill
1150
+ ; RV32-NEXT: fsd fs8, 40(sp) # 8-byte Folded Spill
1151
+ ; RV32-NEXT: fsd fs9, 32(sp) # 8-byte Folded Spill
1152
+ ; RV32-NEXT: fsd fs10, 24(sp) # 8-byte Folded Spill
1153
+ ; RV32-NEXT: fsd fs11, 16(sp) # 8-byte Folded Spill
1144
1154
; RV32-NEXT: .cfi_offset fs0, -8
1145
1155
; RV32-NEXT: .cfi_offset fs1, -16
1156
+ ; RV32-NEXT: .cfi_offset fs2, -24
1157
+ ; RV32-NEXT: .cfi_offset fs3, -32
1158
+ ; RV32-NEXT: .cfi_offset fs4, -40
1159
+ ; RV32-NEXT: .cfi_offset fs5, -48
1160
+ ; RV32-NEXT: .cfi_offset fs6, -56
1161
+ ; RV32-NEXT: .cfi_offset fs7, -64
1162
+ ; RV32-NEXT: .cfi_offset fs8, -72
1163
+ ; RV32-NEXT: .cfi_offset fs9, -80
1164
+ ; RV32-NEXT: .cfi_offset fs10, -88
1165
+ ; RV32-NEXT: .cfi_offset fs11, -96
1146
1166
; RV32-NEXT: sw a6, 8(sp)
1147
1167
; RV32-NEXT: sw a7, 12(sp)
1148
- ; RV32-NEXT: fld ft4 , 8(sp)
1168
+ ; RV32-NEXT: fld ft6 , 8(sp)
1149
1169
; RV32-NEXT: sw a4, 8(sp)
1150
1170
; RV32-NEXT: sw a5, 12(sp)
1151
- ; RV32-NEXT: fld ft5 , 8(sp)
1171
+ ; RV32-NEXT: fld ft7 , 8(sp)
1152
1172
; RV32-NEXT: sw a2, 8(sp)
1153
1173
; RV32-NEXT: sw a3, 12(sp)
1154
- ; RV32-NEXT: fld ft6 , 8(sp)
1174
+ ; RV32-NEXT: fld ft8 , 8(sp)
1155
1175
; RV32-NEXT: sw a0, 8(sp)
1156
1176
; RV32-NEXT: sw a1, 12(sp)
1157
- ; RV32-NEXT: fld ft7, 8(sp)
1158
- ; RV32-NEXT: fld ft0, 184(sp)
1159
- ; RV32-NEXT: fld ft1, 168(sp)
1160
- ; RV32-NEXT: fld ft2, 152(sp)
1161
- ; RV32-NEXT: fld ft3, 136(sp)
1162
- ; RV32-NEXT: fld ft8, 120(sp)
1163
- ; RV32-NEXT: fld ft9, 104(sp)
1164
- ; RV32-NEXT: fld ft10, 72(sp)
1165
- ; RV32-NEXT: fld ft11, 88(sp)
1166
- ; RV32-NEXT: fld fs0, 56(sp)
1167
- ; RV32-NEXT: fld fs1, 40(sp)
1177
+ ; RV32-NEXT: fld ft9, 8(sp)
1178
+ ; RV32-NEXT: fld ft0, 264(sp)
1179
+ ; RV32-NEXT: fld ft1, 256(sp)
1180
+ ; RV32-NEXT: fld ft2, 248(sp)
1181
+ ; RV32-NEXT: fld ft3, 240(sp)
1182
+ ; RV32-NEXT: fld ft4, 232(sp)
1183
+ ; RV32-NEXT: fld ft5, 224(sp)
1184
+ ; RV32-NEXT: fld ft10, 216(sp)
1185
+ ; RV32-NEXT: fld ft11, 208(sp)
1186
+ ; RV32-NEXT: fld fs0, 200(sp)
1187
+ ; RV32-NEXT: fld fs1, 192(sp)
1188
+ ; RV32-NEXT: fld fs2, 184(sp)
1189
+ ; RV32-NEXT: fld fs3, 176(sp)
1190
+ ; RV32-NEXT: fld fs4, 152(sp)
1191
+ ; RV32-NEXT: fld fs5, 144(sp)
1192
+ ; RV32-NEXT: fld fs6, 168(sp)
1193
+ ; RV32-NEXT: fld fs7, 160(sp)
1194
+ ; RV32-NEXT: fld fs8, 136(sp)
1195
+ ; RV32-NEXT: fld fs9, 128(sp)
1196
+ ; RV32-NEXT: fld fs10, 120(sp)
1197
+ ; RV32-NEXT: fld fs11, 112(sp)
1168
1198
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1169
- ; RV32-NEXT: vfmv.v.f v8, ft7
1170
- ; RV32-NEXT: vfslide1down.vf v12, v8, ft6
1199
+ ; RV32-NEXT: vfmv.v.f v8, ft9
1200
+ ; RV32-NEXT: vfslide1down.vf v12, v8, ft8
1171
1201
; RV32-NEXT: vfmv.v.f v8, fa2
1172
1202
; RV32-NEXT: vfslide1down.vf v9, v8, fa3
1173
1203
; RV32-NEXT: vfmv.v.f v8, fa0
@@ -1176,55 +1206,71 @@ define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double
1176
1206
; RV32-NEXT: vfslide1down.vf v10, v10, fa5
1177
1207
; RV32-NEXT: vfmv.v.f v11, fa6
1178
1208
; RV32-NEXT: vfslide1down.vf v11, v11, fa7
1179
- ; RV32-NEXT: addi a0, sp, 32
1180
- ; RV32-NEXT: vlse64.v v14, (a0), zero
1181
- ; RV32-NEXT: addi a0, sp, 48
1182
- ; RV32-NEXT: vlse64.v v15, (a0), zero
1183
- ; RV32-NEXT: vfmv.v.f v13, ft5
1184
- ; RV32-NEXT: vfslide1down.vf v13, v13, ft4
1185
- ; RV32-NEXT: vfslide1down.vf v14, v14, fs1
1186
- ; RV32-NEXT: vfslide1down.vf v15, v15, fs0
1187
- ; RV32-NEXT: addi a0, sp, 80
1188
- ; RV32-NEXT: vlse64.v v16, (a0), zero
1189
- ; RV32-NEXT: addi a0, sp, 64
1190
- ; RV32-NEXT: vlse64.v v18, (a0), zero
1191
- ; RV32-NEXT: addi a0, sp, 96
1192
- ; RV32-NEXT: vlse64.v v19, (a0), zero
1193
- ; RV32-NEXT: addi a0, sp, 112
1194
- ; RV32-NEXT: vlse64.v v20, (a0), zero
1195
- ; RV32-NEXT: vfslide1down.vf v17, v16, ft11
1196
- ; RV32-NEXT: vfslide1down.vf v16, v18, ft10
1197
- ; RV32-NEXT: vfslide1down.vf v18, v19, ft9
1198
- ; RV32-NEXT: vfslide1down.vf v19, v20, ft8
1199
- ; RV32-NEXT: addi a0, sp, 128
1200
- ; RV32-NEXT: vlse64.v v20, (a0), zero
1201
- ; RV32-NEXT: addi a0, sp, 144
1202
- ; RV32-NEXT: vlse64.v v21, (a0), zero
1203
- ; RV32-NEXT: addi a0, sp, 160
1204
- ; RV32-NEXT: vlse64.v v22, (a0), zero
1205
- ; RV32-NEXT: addi a0, sp, 176
1206
- ; RV32-NEXT: vlse64.v v23, (a0), zero
1207
- ; RV32-NEXT: vfslide1down.vf v20, v20, ft3
1208
- ; RV32-NEXT: vfslide1down.vf v21, v21, ft2
1209
- ; RV32-NEXT: vfslide1down.vf v22, v22, ft1
1209
+ ; RV32-NEXT: vfmv.v.f v13, ft7
1210
+ ; RV32-NEXT: vfslide1down.vf v13, v13, ft6
1211
+ ; RV32-NEXT: vfmv.v.f v14, fs11
1212
+ ; RV32-NEXT: vfslide1down.vf v14, v14, fs10
1213
+ ; RV32-NEXT: vfmv.v.f v15, fs9
1214
+ ; RV32-NEXT: vfslide1down.vf v15, v15, fs8
1215
+ ; RV32-NEXT: vfmv.v.f v16, fs7
1216
+ ; RV32-NEXT: vfslide1down.vf v17, v16, fs6
1217
+ ; RV32-NEXT: vfmv.v.f v16, fs5
1218
+ ; RV32-NEXT: vfslide1down.vf v16, v16, fs4
1219
+ ; RV32-NEXT: vfmv.v.f v18, fs3
1220
+ ; RV32-NEXT: vfslide1down.vf v18, v18, fs2
1221
+ ; RV32-NEXT: vfmv.v.f v19, fs1
1222
+ ; RV32-NEXT: vfslide1down.vf v19, v19, fs0
1223
+ ; RV32-NEXT: vfmv.v.f v20, ft11
1224
+ ; RV32-NEXT: vfslide1down.vf v20, v20, ft10
1225
+ ; RV32-NEXT: vfmv.v.f v21, ft5
1226
+ ; RV32-NEXT: vfslide1down.vf v21, v21, ft4
1227
+ ; RV32-NEXT: vfmv.v.f v22, ft3
1228
+ ; RV32-NEXT: vfslide1down.vf v22, v22, ft2
1229
+ ; RV32-NEXT: vfmv.v.f v23, ft1
1210
1230
; RV32-NEXT: vfslide1down.vf v23, v23, ft0
1211
- ; RV32-NEXT: fld fs0, 24(sp) # 8-byte Folded Reload
1212
- ; RV32-NEXT: fld fs1, 16(sp) # 8-byte Folded Reload
1213
- ; RV32-NEXT: addi sp, sp, 32
1231
+ ; RV32-NEXT: fld fs0, 104(sp) # 8-byte Folded Reload
1232
+ ; RV32-NEXT: fld fs1, 96(sp) # 8-byte Folded Reload
1233
+ ; RV32-NEXT: fld fs2, 88(sp) # 8-byte Folded Reload
1234
+ ; RV32-NEXT: fld fs3, 80(sp) # 8-byte Folded Reload
1235
+ ; RV32-NEXT: fld fs4, 72(sp) # 8-byte Folded Reload
1236
+ ; RV32-NEXT: fld fs5, 64(sp) # 8-byte Folded Reload
1237
+ ; RV32-NEXT: fld fs6, 56(sp) # 8-byte Folded Reload
1238
+ ; RV32-NEXT: fld fs7, 48(sp) # 8-byte Folded Reload
1239
+ ; RV32-NEXT: fld fs8, 40(sp) # 8-byte Folded Reload
1240
+ ; RV32-NEXT: fld fs9, 32(sp) # 8-byte Folded Reload
1241
+ ; RV32-NEXT: fld fs10, 24(sp) # 8-byte Folded Reload
1242
+ ; RV32-NEXT: fld fs11, 16(sp) # 8-byte Folded Reload
1243
+ ; RV32-NEXT: addi sp, sp, 112
1214
1244
; RV32-NEXT: ret
1215
1245
;
1216
1246
; RV64-LABEL: buildvec_v32f64_exact_vlen:
1217
1247
; RV64: # %bb.0:
1218
- ; RV64-NEXT: addi sp, sp, -32
1219
- ; RV64-NEXT: .cfi_def_cfa_offset 32
1220
- ; RV64-NEXT: fsd fs0, 24(sp) # 8-byte Folded Spill
1221
- ; RV64-NEXT: fsd fs1, 16(sp) # 8-byte Folded Spill
1222
- ; RV64-NEXT: fsd fs2, 8(sp) # 8-byte Folded Spill
1223
- ; RV64-NEXT: fsd fs3, 0(sp) # 8-byte Folded Spill
1248
+ ; RV64-NEXT: addi sp, sp, -96
1249
+ ; RV64-NEXT: .cfi_def_cfa_offset 96
1250
+ ; RV64-NEXT: fsd fs0, 88(sp) # 8-byte Folded Spill
1251
+ ; RV64-NEXT: fsd fs1, 80(sp) # 8-byte Folded Spill
1252
+ ; RV64-NEXT: fsd fs2, 72(sp) # 8-byte Folded Spill
1253
+ ; RV64-NEXT: fsd fs3, 64(sp) # 8-byte Folded Spill
1254
+ ; RV64-NEXT: fsd fs4, 56(sp) # 8-byte Folded Spill
1255
+ ; RV64-NEXT: fsd fs5, 48(sp) # 8-byte Folded Spill
1256
+ ; RV64-NEXT: fsd fs6, 40(sp) # 8-byte Folded Spill
1257
+ ; RV64-NEXT: fsd fs7, 32(sp) # 8-byte Folded Spill
1258
+ ; RV64-NEXT: fsd fs8, 24(sp) # 8-byte Folded Spill
1259
+ ; RV64-NEXT: fsd fs9, 16(sp) # 8-byte Folded Spill
1260
+ ; RV64-NEXT: fsd fs10, 8(sp) # 8-byte Folded Spill
1261
+ ; RV64-NEXT: fsd fs11, 0(sp) # 8-byte Folded Spill
1224
1262
; RV64-NEXT: .cfi_offset fs0, -8
1225
1263
; RV64-NEXT: .cfi_offset fs1, -16
1226
1264
; RV64-NEXT: .cfi_offset fs2, -24
1227
1265
; RV64-NEXT: .cfi_offset fs3, -32
1266
+ ; RV64-NEXT: .cfi_offset fs4, -40
1267
+ ; RV64-NEXT: .cfi_offset fs5, -48
1268
+ ; RV64-NEXT: .cfi_offset fs6, -56
1269
+ ; RV64-NEXT: .cfi_offset fs7, -64
1270
+ ; RV64-NEXT: .cfi_offset fs8, -72
1271
+ ; RV64-NEXT: .cfi_offset fs9, -80
1272
+ ; RV64-NEXT: .cfi_offset fs10, -88
1273
+ ; RV64-NEXT: .cfi_offset fs11, -96
1228
1274
; RV64-NEXT: fmv.d.x ft4, a7
1229
1275
; RV64-NEXT: fmv.d.x ft5, a6
1230
1276
; RV64-NEXT: fmv.d.x ft6, a5
@@ -1233,14 +1279,22 @@ define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double
1233
1279
; RV64-NEXT: fmv.d.x ft9, a2
1234
1280
; RV64-NEXT: fmv.d.x ft10, a1
1235
1281
; RV64-NEXT: fmv.d.x ft11, a0
1236
- ; RV64-NEXT: fld ft0, 152(sp)
1237
- ; RV64-NEXT: fld ft1, 136(sp)
1238
- ; RV64-NEXT: fld ft2, 120(sp)
1239
- ; RV64-NEXT: fld ft3, 104(sp)
1240
- ; RV64-NEXT: fld fs0, 88(sp)
1241
- ; RV64-NEXT: fld fs1, 72(sp)
1242
- ; RV64-NEXT: fld fs2, 40(sp)
1243
- ; RV64-NEXT: fld fs3, 56(sp)
1282
+ ; RV64-NEXT: fld ft0, 216(sp)
1283
+ ; RV64-NEXT: fld ft1, 208(sp)
1284
+ ; RV64-NEXT: fld ft2, 200(sp)
1285
+ ; RV64-NEXT: fld ft3, 192(sp)
1286
+ ; RV64-NEXT: fld fs0, 184(sp)
1287
+ ; RV64-NEXT: fld fs1, 176(sp)
1288
+ ; RV64-NEXT: fld fs2, 168(sp)
1289
+ ; RV64-NEXT: fld fs3, 160(sp)
1290
+ ; RV64-NEXT: fld fs4, 152(sp)
1291
+ ; RV64-NEXT: fld fs5, 144(sp)
1292
+ ; RV64-NEXT: fld fs6, 136(sp)
1293
+ ; RV64-NEXT: fld fs7, 128(sp)
1294
+ ; RV64-NEXT: fld fs8, 104(sp)
1295
+ ; RV64-NEXT: fld fs9, 96(sp)
1296
+ ; RV64-NEXT: fld fs10, 120(sp)
1297
+ ; RV64-NEXT: fld fs11, 112(sp)
1244
1298
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1245
1299
; RV64-NEXT: vfmv.v.f v8, fa2
1246
1300
; RV64-NEXT: vfslide1down.vf v9, v8, fa3
@@ -1258,35 +1312,35 @@ define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double
1258
1312
; RV64-NEXT: vfslide1down.vf v14, v14, ft6
1259
1313
; RV64-NEXT: vfmv.v.f v15, ft5
1260
1314
; RV64-NEXT: vfslide1down.vf v15, v15, ft4
1261
- ; RV64-NEXT: addi a0, sp, 48
1262
- ; RV64-NEXT: vlse64.v v16, (a0), zero
1263
- ; RV64-NEXT: addi a0, sp, 32
1264
- ; RV64-NEXT: vlse64.v v18, (a0), zero
1265
- ; RV64-NEXT: addi a0, sp, 64
1266
- ; RV64-NEXT: vlse64.v v19, (a0), zero
1267
- ; RV64-NEXT: addi a0, sp, 80
1268
- ; RV64-NEXT: vlse64.v v20, (a0), zero
1269
- ; RV64-NEXT: vfslide1down.vf v17, v16, fs3
1270
- ; RV64-NEXT: vfslide1down.vf v16, v18, fs2
1271
- ; RV64-NEXT: vfslide1down.vf v18, v19, fs1
1272
- ; RV64-NEXT: vfslide1down.vf v19, v20, fs0
1273
- ; RV64-NEXT: addi a0, sp, 96
1274
- ; RV64-NEXT: vlse64.v v20, (a0), zero
1275
- ; RV64-NEXT: addi a0, sp, 112
1276
- ; RV64-NEXT: vlse64.v v21, (a0), zero
1277
- ; RV64-NEXT: addi a0, sp, 128
1278
- ; RV64-NEXT: vlse64.v v22, (a0), zero
1279
- ; RV64-NEXT: addi a0, sp, 144
1280
- ; RV64-NEXT: vlse64.v v23, (a0), zero
1281
- ; RV64-NEXT: vfslide1down.vf v20, v20, ft3
1282
- ; RV64-NEXT: vfslide1down.vf v21, v21, ft2
1283
- ; RV64-NEXT: vfslide1down.vf v22, v22, ft1
1315
+ ; RV64-NEXT: vfmv.v.f v16, fs11
1316
+ ; RV64-NEXT: vfslide1down.vf v17, v16, fs10
1317
+ ; RV64-NEXT: vfmv.v.f v16, fs9
1318
+ ; RV64-NEXT: vfslide1down.vf v16, v16, fs8
1319
+ ; RV64-NEXT: vfmv.v.f v18, fs7
1320
+ ; RV64-NEXT: vfslide1down.vf v18, v18, fs6
1321
+ ; RV64-NEXT: vfmv.v.f v19, fs5
1322
+ ; RV64-NEXT: vfslide1down.vf v19, v19, fs4
1323
+ ; RV64-NEXT: vfmv.v.f v20, fs3
1324
+ ; RV64-NEXT: vfslide1down.vf v20, v20, fs2
1325
+ ; RV64-NEXT: vfmv.v.f v21, fs1
1326
+ ; RV64-NEXT: vfslide1down.vf v21, v21, fs0
1327
+ ; RV64-NEXT: vfmv.v.f v22, ft3
1328
+ ; RV64-NEXT: vfslide1down.vf v22, v22, ft2
1329
+ ; RV64-NEXT: vfmv.v.f v23, ft1
1284
1330
; RV64-NEXT: vfslide1down.vf v23, v23, ft0
1285
- ; RV64-NEXT: fld fs0, 24(sp) # 8-byte Folded Reload
1286
- ; RV64-NEXT: fld fs1, 16(sp) # 8-byte Folded Reload
1287
- ; RV64-NEXT: fld fs2, 8(sp) # 8-byte Folded Reload
1288
- ; RV64-NEXT: fld fs3, 0(sp) # 8-byte Folded Reload
1289
- ; RV64-NEXT: addi sp, sp, 32
1331
+ ; RV64-NEXT: fld fs0, 88(sp) # 8-byte Folded Reload
1332
+ ; RV64-NEXT: fld fs1, 80(sp) # 8-byte Folded Reload
1333
+ ; RV64-NEXT: fld fs2, 72(sp) # 8-byte Folded Reload
1334
+ ; RV64-NEXT: fld fs3, 64(sp) # 8-byte Folded Reload
1335
+ ; RV64-NEXT: fld fs4, 56(sp) # 8-byte Folded Reload
1336
+ ; RV64-NEXT: fld fs5, 48(sp) # 8-byte Folded Reload
1337
+ ; RV64-NEXT: fld fs6, 40(sp) # 8-byte Folded Reload
1338
+ ; RV64-NEXT: fld fs7, 32(sp) # 8-byte Folded Reload
1339
+ ; RV64-NEXT: fld fs8, 24(sp) # 8-byte Folded Reload
1340
+ ; RV64-NEXT: fld fs9, 16(sp) # 8-byte Folded Reload
1341
+ ; RV64-NEXT: fld fs10, 8(sp) # 8-byte Folded Reload
1342
+ ; RV64-NEXT: fld fs11, 0(sp) # 8-byte Folded Reload
1343
+ ; RV64-NEXT: addi sp, sp, 96
1290
1344
; RV64-NEXT: ret
1291
1345
%v0 = insertelement <32 x double > poison, double %e0 , i64 0
1292
1346
%v1 = insertelement <32 x double > %v0 , double %e1 , i64 1
0 commit comments