Skip to content

Commit a86ff34

Browse files
committed
[RISCV] Support saturated truncate
Add support for saturated truncate by implementing the following changes: - Add `TRUNCATE_[SU]SAT_[SU]` to the Action target of `TRUNCATE` - Add `TRUNCATE_[SU]SAT_[SU]` to the TargetLowering target of `TRUNCATE` - Convert `TRUNCATE_SSAT_S` to `TRUNCATE_VECTOR_VL_SSAT` - Convert `TRUNCATE_[SU]SAT_U` to `TRUNCATE_VECTOR_VL_USAT`
1 parent bc53a4d commit a86ff34

File tree

5 files changed

+80
-134
lines changed

5 files changed

+80
-134
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -853,7 +853,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
853853

854854
// Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
855855
// nodes which truncate by one power of two at a time.
856-
setOperationAction(ISD::TRUNCATE, VT, Custom);
856+
setOperationAction({ISD::TRUNCATE, ISD::TRUNCATE_SSAT_S,
857+
ISD::TRUNCATE_SSAT_U, ISD::TRUNCATE_USAT_U},
858+
VT, Custom);
857859

858860
// Custom-lower insert/extract operations to simplify patterns.
859861
setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
@@ -1168,7 +1170,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
11681170

11691171
setOperationAction(ISD::SELECT, VT, Custom);
11701172

1171-
setOperationAction(ISD::TRUNCATE, VT, Custom);
1173+
setOperationAction({ISD::TRUNCATE, ISD::TRUNCATE_SSAT_S,
1174+
ISD::TRUNCATE_SSAT_U, ISD::TRUNCATE_USAT_U},
1175+
VT, Custom);
11721176

11731177
setOperationAction(ISD::BITCAST, VT, Custom);
11741178

@@ -6395,6 +6399,9 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
63956399
return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
63966400
}
63976401
case ISD::TRUNCATE:
6402+
case ISD::TRUNCATE_SSAT_S:
6403+
case ISD::TRUNCATE_SSAT_U:
6404+
case ISD::TRUNCATE_USAT_U:
63986405
// Only custom-lower vector truncates
63996406
if (!Op.getSimpleValueType().isVector())
64006407
return Op;
@@ -8234,7 +8241,8 @@ SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
82348241

82358242
SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
82368243
SelectionDAG &DAG) const {
8237-
bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
8244+
unsigned Opc = Op.getOpcode();
8245+
bool IsVPTrunc = Opc == ISD::VP_TRUNCATE;
82388246
SDLoc DL(Op);
82398247

82408248
MVT VT = Op.getSimpleValueType();
@@ -8279,11 +8287,18 @@ SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
82798287
getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
82808288
}
82818289

8290+
unsigned NewOpc;
8291+
if (Opc == ISD::TRUNCATE_SSAT_S)
8292+
NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
8293+
else if (Opc == ISD::TRUNCATE_SSAT_U || Opc == ISD::TRUNCATE_USAT_U)
8294+
NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
8295+
else
8296+
NewOpc = RISCVISD::TRUNCATE_VECTOR_VL;
8297+
82828298
do {
82838299
SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
82848300
MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);
8285-
Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
8286-
Mask, VL);
8301+
Result = DAG.getNode(NewOpc, DL, ResultVT, Result, Mask, VL);
82878302
} while (SrcEltVT != DstEltVT);
82888303

82898304
if (SrcVT.isFixedLengthVector())

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll

Lines changed: 8 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -101,10 +101,8 @@ define void @trunc_sat_u8u16_notopt(ptr %x, ptr %y) {
101101
define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
102102
; CHECK-LABEL: trunc_sat_u8u16_maxmin:
103103
; CHECK: # %bb.0:
104-
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
104+
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
105105
; CHECK-NEXT: vle16.v v8, (a0)
106-
; CHECK-NEXT: vmax.vx v8, v8, zero
107-
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
108106
; CHECK-NEXT: vnclipu.wi v8, v8, 0
109107
; CHECK-NEXT: vse8.v v8, (a1)
110108
; CHECK-NEXT: ret
@@ -119,10 +117,8 @@ define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
119117
define void @trunc_sat_u8u16_minmax(ptr %x, ptr %y) {
120118
; CHECK-LABEL: trunc_sat_u8u16_minmax:
121119
; CHECK: # %bb.0:
122-
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
120+
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
123121
; CHECK-NEXT: vle16.v v8, (a0)
124-
; CHECK-NEXT: vmax.vx v8, v8, zero
125-
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
126122
; CHECK-NEXT: vnclipu.wi v8, v8, 0
127123
; CHECK-NEXT: vse8.v v8, (a1)
128124
; CHECK-NEXT: ret
@@ -356,10 +352,8 @@ define void @trunc_sat_u32u64_min(ptr %x, ptr %y) {
356352
define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
357353
; CHECK-LABEL: trunc_sat_u32u64_maxmin:
358354
; CHECK: # %bb.0:
359-
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
355+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
360356
; CHECK-NEXT: vle64.v v8, (a0)
361-
; CHECK-NEXT: vmax.vx v8, v8, zero
362-
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
363357
; CHECK-NEXT: vnclipu.wi v10, v8, 0
364358
; CHECK-NEXT: vse32.v v10, (a1)
365359
; CHECK-NEXT: ret
@@ -374,10 +368,8 @@ define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
374368
define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) {
375369
; CHECK-LABEL: trunc_sat_u32u64_minmax:
376370
; CHECK: # %bb.0:
377-
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
371+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
378372
; CHECK-NEXT: vle64.v v8, (a0)
379-
; CHECK-NEXT: vmax.vx v8, v8, zero
380-
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
381373
; CHECK-NEXT: vnclipu.wi v10, v8, 0
382374
; CHECK-NEXT: vse32.v v10, (a1)
383375
; CHECK-NEXT: ret
@@ -445,10 +437,8 @@ define void @trunc_sat_u8u32_min(ptr %x, ptr %y) {
445437
define void @trunc_sat_u8u32_maxmin(ptr %x, ptr %y) {
446438
; CHECK-LABEL: trunc_sat_u8u32_maxmin:
447439
; CHECK: # %bb.0:
448-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
440+
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
449441
; CHECK-NEXT: vle32.v v8, (a0)
450-
; CHECK-NEXT: vmax.vx v8, v8, zero
451-
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
452442
; CHECK-NEXT: vnclipu.wi v8, v8, 0
453443
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
454444
; CHECK-NEXT: vnclipu.wi v8, v8, 0
@@ -465,10 +455,8 @@ define void @trunc_sat_u8u32_maxmin(ptr %x, ptr %y) {
465455
define void @trunc_sat_u8u32_minmax(ptr %x, ptr %y) {
466456
; CHECK-LABEL: trunc_sat_u8u32_minmax:
467457
; CHECK: # %bb.0:
468-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
458+
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
469459
; CHECK-NEXT: vle32.v v8, (a0)
470-
; CHECK-NEXT: vmax.vx v8, v8, zero
471-
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
472460
; CHECK-NEXT: vnclipu.wi v8, v8, 0
473461
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
474462
; CHECK-NEXT: vnclipu.wi v8, v8, 0
@@ -544,10 +532,8 @@ define void @trunc_sat_u8u64_min(ptr %x, ptr %y) {
544532
define void @trunc_sat_u8u64_maxmin(ptr %x, ptr %y) {
545533
; CHECK-LABEL: trunc_sat_u8u64_maxmin:
546534
; CHECK: # %bb.0:
547-
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
535+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
548536
; CHECK-NEXT: vle64.v v8, (a0)
549-
; CHECK-NEXT: vmax.vx v8, v8, zero
550-
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
551537
; CHECK-NEXT: vnclipu.wi v10, v8, 0
552538
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
553539
; CHECK-NEXT: vnclipu.wi v8, v10, 0
@@ -566,10 +552,8 @@ define void @trunc_sat_u8u64_maxmin(ptr %x, ptr %y) {
566552
define void @trunc_sat_u8u64_minmax(ptr %x, ptr %y) {
567553
; CHECK-LABEL: trunc_sat_u8u64_minmax:
568554
; CHECK: # %bb.0:
569-
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
555+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
570556
; CHECK-NEXT: vle64.v v8, (a0)
571-
; CHECK-NEXT: vmax.vx v8, v8, zero
572-
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
573557
; CHECK-NEXT: vnclipu.wi v10, v8, 0
574558
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
575559
; CHECK-NEXT: vnclipu.wi v8, v10, 0

llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll

Lines changed: 26 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,6 @@ define <2 x i32> @ustest_f64i32(<2 x double> %x) {
113113
; CHECK-V: # %bb.0: # %entry
114114
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
115115
; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8
116-
; CHECK-V-NEXT: vmax.vx v8, v8, zero
117116
; CHECK-V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
118117
; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
119118
; CHECK-V-NEXT: ret
@@ -304,9 +303,6 @@ define <4 x i32> @ustest_f32i32(<4 x float> %x) {
304303
; CHECK-V: # %bb.0: # %entry
305304
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
306305
; CHECK-V-NEXT: vfwcvt.rtz.x.f.v v10, v8
307-
; CHECK-V-NEXT: vsetvli zero, zero, e64, m2, ta, ma
308-
; CHECK-V-NEXT: vmax.vx v10, v10, zero
309-
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
310306
; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
311307
; CHECK-V-NEXT: ret
312308
entry:
@@ -801,17 +797,16 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
801797
; CHECK-V-NEXT: call __extendhfsf2
802798
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
803799
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
804-
; CHECK-V-NEXT: vmv.s.x v8, a0
800+
; CHECK-V-NEXT: vmv.s.x v10, a0
805801
; CHECK-V-NEXT: addi a0, sp, 16
806-
; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
807-
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
802+
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
803+
; CHECK-V-NEXT: vslideup.vi v10, v8, 1
808804
; CHECK-V-NEXT: csrr a0, vlenb
809805
; CHECK-V-NEXT: add a0, sp, a0
810806
; CHECK-V-NEXT: addi a0, a0, 16
811-
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
807+
; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
812808
; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
813-
; CHECK-V-NEXT: vslideup.vi v8, v10, 2
814-
; CHECK-V-NEXT: vmax.vx v10, v8, zero
809+
; CHECK-V-NEXT: vslideup.vi v10, v8, 2
815810
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
816811
; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
817812
; CHECK-V-NEXT: csrr a0, vlenb
@@ -944,9 +939,8 @@ define <2 x i16> @ustest_f64i16(<2 x double> %x) {
944939
; CHECK-V: # %bb.0: # %entry
945940
; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
946941
; CHECK-V-NEXT: vfncvt.rtz.x.f.w v9, v8
947-
; CHECK-V-NEXT: vmax.vx v8, v9, zero
948942
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
949-
; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
943+
; CHECK-V-NEXT: vnclipu.wi v8, v9, 0
950944
; CHECK-V-NEXT: ret
951945
entry:
952946
%conv = fptosi <2 x double> %x to <2 x i32>
@@ -1139,7 +1133,6 @@ define <4 x i16> @ustest_f32i16(<4 x float> %x) {
11391133
; CHECK-V: # %bb.0: # %entry
11401134
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
11411135
; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8
1142-
; CHECK-V-NEXT: vmax.vx v8, v8, zero
11431136
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
11441137
; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
11451138
; CHECK-V-NEXT: ret
@@ -2114,24 +2107,23 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
21142107
; CHECK-V-NEXT: call __extendhfsf2
21152108
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
21162109
; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
2117-
; CHECK-V-NEXT: vmv.s.x v8, a0
2110+
; CHECK-V-NEXT: vmv.s.x v10, a0
21182111
; CHECK-V-NEXT: addi a0, sp, 16
2119-
; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
2120-
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
2112+
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
2113+
; CHECK-V-NEXT: vslideup.vi v10, v8, 1
21212114
; CHECK-V-NEXT: csrr a0, vlenb
21222115
; CHECK-V-NEXT: add a0, sp, a0
21232116
; CHECK-V-NEXT: addi a0, a0, 16
2124-
; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
2117+
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
21252118
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2126-
; CHECK-V-NEXT: vslideup.vi v8, v9, 2
2119+
; CHECK-V-NEXT: vslideup.vi v10, v8, 2
21272120
; CHECK-V-NEXT: csrr a0, vlenb
21282121
; CHECK-V-NEXT: slli a0, a0, 1
21292122
; CHECK-V-NEXT: add a0, sp, a0
21302123
; CHECK-V-NEXT: addi a0, a0, 16
2131-
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
2124+
; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
21322125
; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2133-
; CHECK-V-NEXT: vslideup.vi v8, v10, 4
2134-
; CHECK-V-NEXT: vmax.vx v10, v8, zero
2126+
; CHECK-V-NEXT: vslideup.vi v10, v8, 4
21352127
; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
21362128
; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
21372129
; CHECK-V-NEXT: csrr a0, vlenb
@@ -3473,7 +3465,6 @@ define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) {
34733465
; CHECK-V: # %bb.0: # %entry
34743466
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
34753467
; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8
3476-
; CHECK-V-NEXT: vmax.vx v8, v8, zero
34773468
; CHECK-V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
34783469
; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
34793470
; CHECK-V-NEXT: ret
@@ -3659,9 +3650,6 @@ define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) {
36593650
; CHECK-V: # %bb.0: # %entry
36603651
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
36613652
; CHECK-V-NEXT: vfwcvt.rtz.x.f.v v10, v8
3662-
; CHECK-V-NEXT: vsetvli zero, zero, e64, m2, ta, ma
3663-
; CHECK-V-NEXT: vmax.vx v10, v10, zero
3664-
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
36653653
; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
36663654
; CHECK-V-NEXT: ret
36673655
entry:
@@ -4151,17 +4139,16 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
41514139
; CHECK-V-NEXT: call __extendhfsf2
41524140
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
41534141
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
4154-
; CHECK-V-NEXT: vmv.s.x v8, a0
4142+
; CHECK-V-NEXT: vmv.s.x v10, a0
41554143
; CHECK-V-NEXT: addi a0, sp, 16
4156-
; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
4157-
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
4144+
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
4145+
; CHECK-V-NEXT: vslideup.vi v10, v8, 1
41584146
; CHECK-V-NEXT: csrr a0, vlenb
41594147
; CHECK-V-NEXT: add a0, sp, a0
41604148
; CHECK-V-NEXT: addi a0, a0, 16
4161-
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
4149+
; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
41624150
; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
4163-
; CHECK-V-NEXT: vslideup.vi v8, v10, 2
4164-
; CHECK-V-NEXT: vmax.vx v10, v8, zero
4151+
; CHECK-V-NEXT: vslideup.vi v10, v8, 2
41654152
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
41664153
; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
41674154
; CHECK-V-NEXT: csrr a0, vlenb
@@ -4289,9 +4276,8 @@ define <2 x i16> @ustest_f64i16_mm(<2 x double> %x) {
42894276
; CHECK-V: # %bb.0: # %entry
42904277
; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
42914278
; CHECK-V-NEXT: vfncvt.rtz.x.f.w v9, v8
4292-
; CHECK-V-NEXT: vmax.vx v8, v9, zero
42934279
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
4294-
; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
4280+
; CHECK-V-NEXT: vnclipu.wi v8, v9, 0
42954281
; CHECK-V-NEXT: ret
42964282
entry:
42974283
%conv = fptosi <2 x double> %x to <2 x i32>
@@ -4479,7 +4465,6 @@ define <4 x i16> @ustest_f32i16_mm(<4 x float> %x) {
44794465
; CHECK-V: # %bb.0: # %entry
44804466
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
44814467
; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8
4482-
; CHECK-V-NEXT: vmax.vx v8, v8, zero
44834468
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
44844469
; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
44854470
; CHECK-V-NEXT: ret
@@ -5449,24 +5434,23 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
54495434
; CHECK-V-NEXT: call __extendhfsf2
54505435
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
54515436
; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
5452-
; CHECK-V-NEXT: vmv.s.x v8, a0
5437+
; CHECK-V-NEXT: vmv.s.x v10, a0
54535438
; CHECK-V-NEXT: addi a0, sp, 16
5454-
; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
5455-
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
5439+
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
5440+
; CHECK-V-NEXT: vslideup.vi v10, v8, 1
54565441
; CHECK-V-NEXT: csrr a0, vlenb
54575442
; CHECK-V-NEXT: add a0, sp, a0
54585443
; CHECK-V-NEXT: addi a0, a0, 16
5459-
; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
5444+
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
54605445
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5461-
; CHECK-V-NEXT: vslideup.vi v8, v9, 2
5446+
; CHECK-V-NEXT: vslideup.vi v10, v8, 2
54625447
; CHECK-V-NEXT: csrr a0, vlenb
54635448
; CHECK-V-NEXT: slli a0, a0, 1
54645449
; CHECK-V-NEXT: add a0, sp, a0
54655450
; CHECK-V-NEXT: addi a0, a0, 16
5466-
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
5451+
; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
54675452
; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5468-
; CHECK-V-NEXT: vslideup.vi v8, v10, 4
5469-
; CHECK-V-NEXT: vmax.vx v10, v8, zero
5453+
; CHECK-V-NEXT: vslideup.vi v10, v8, 4
54705454
; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
54715455
; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
54725456
; CHECK-V-NEXT: csrr a0, vlenb

0 commit comments

Comments
 (0)