You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
This patch adds rvv codegen support for vp.fptrunc. The lowering of fp_round and vp.fptrunc share most code so use a common lowering function to handle those two, similar to vp.trunc.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D123841
declare <2 x half> @llvm.vp.fptrunc.v2f16.v2f32(<2 x float>, <2 x i1>, i32)
6
+
7
+
define <2 x half> @vfptrunc_v2f16_v2f32(<2 x float> %a, <2 x i1> %m, i32zeroext%vl) {
8
+
; CHECK-LABEL: vfptrunc_v2f16_v2f32:
9
+
; CHECK: # %bb.0:
10
+
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
11
+
; CHECK-NEXT: vfncvt.f.f.w v9, v8, v0.t
12
+
; CHECK-NEXT: vmv1r.v v8, v9
13
+
; CHECK-NEXT: ret
14
+
%v = call <2 x half> @llvm.vp.fptrunc.v2f16.v2f32(<2 x float> %a, <2 x i1> %m, i32%vl)
15
+
ret <2 x half> %v
16
+
}
17
+
18
+
define <2 x half> @vfptrunc_v2f16_v2f32_unmasked(<2 x float> %a, i32zeroext%vl) {
19
+
; CHECK-LABEL: vfptrunc_v2f16_v2f32_unmasked:
20
+
; CHECK: # %bb.0:
21
+
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
22
+
; CHECK-NEXT: vfncvt.f.f.w v9, v8
23
+
; CHECK-NEXT: vmv1r.v v8, v9
24
+
; CHECK-NEXT: ret
25
+
%v = call <2 x half> @llvm.vp.fptrunc.v2f16.v2f32(<2 x float> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1true, i320), <2 x i1> undef, <2 x i32> zeroinitializer), i32%vl)
26
+
ret <2 x half> %v
27
+
}
28
+
29
+
declare <2 x half> @llvm.vp.fptrunc.v2f16.v2f64(<2 x double>, <2 x i1>, i32)
30
+
31
+
define <2 x half> @vfptrunc_v2f16_v2f64(<2 x double> %a, <2 x i1> %m, i32zeroext%vl) {
32
+
; CHECK-LABEL: vfptrunc_v2f16_v2f64:
33
+
; CHECK: # %bb.0:
34
+
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
35
+
; CHECK-NEXT: vfncvt.rod.f.f.w v9, v8, v0.t
36
+
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
37
+
; CHECK-NEXT: vfncvt.f.f.w v8, v9, v0.t
38
+
; CHECK-NEXT: ret
39
+
%v = call <2 x half> @llvm.vp.fptrunc.v2f16.v2f64(<2 x double> %a, <2 x i1> %m, i32%vl)
40
+
ret <2 x half> %v
41
+
}
42
+
43
+
define <2 x half> @vfptrunc_v2f16_v2f64_unmasked(<2 x double> %a, i32zeroext%vl) {
44
+
; CHECK-LABEL: vfptrunc_v2f16_v2f64_unmasked:
45
+
; CHECK: # %bb.0:
46
+
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
47
+
; CHECK-NEXT: vfncvt.rod.f.f.w v9, v8
48
+
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
49
+
; CHECK-NEXT: vfncvt.f.f.w v8, v9
50
+
; CHECK-NEXT: ret
51
+
%v = call <2 x half> @llvm.vp.fptrunc.v2f16.v2f64(<2 x double> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1true, i320), <2 x i1> undef, <2 x i32> zeroinitializer), i32%vl)
52
+
ret <2 x half> %v
53
+
}
54
+
55
+
declare <2 x float> @llvm.vp.fptrunc.v2f64.v2f32(<2 x double>, <2 x i1>, i32)
56
+
57
+
define <2 x float> @vfptrunc_v2f32_v2f64(<2 x double> %a, <2 x i1> %m, i32zeroext%vl) {
58
+
; CHECK-LABEL: vfptrunc_v2f32_v2f64:
59
+
; CHECK: # %bb.0:
60
+
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
61
+
; CHECK-NEXT: vfncvt.f.f.w v9, v8, v0.t
62
+
; CHECK-NEXT: vmv1r.v v8, v9
63
+
; CHECK-NEXT: ret
64
+
%v = call <2 x float> @llvm.vp.fptrunc.v2f64.v2f32(<2 x double> %a, <2 x i1> %m, i32%vl)
65
+
ret <2 x float> %v
66
+
}
67
+
68
+
define <2 x float> @vfptrunc_v2f32_v2f64_unmasked(<2 x double> %a, i32zeroext%vl) {
69
+
; CHECK-LABEL: vfptrunc_v2f32_v2f64_unmasked:
70
+
; CHECK: # %bb.0:
71
+
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
72
+
; CHECK-NEXT: vfncvt.f.f.w v9, v8
73
+
; CHECK-NEXT: vmv1r.v v8, v9
74
+
; CHECK-NEXT: ret
75
+
%v = call <2 x float> @llvm.vp.fptrunc.v2f64.v2f32(<2 x double> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1true, i320), <2 x i1> undef, <2 x i32> zeroinitializer), i32%vl)
declare <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
6
+
7
+
define <vscale x 2 x half> @vfptrunc_nxv2f16_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> %m, i32zeroext%vl) {
8
+
; CHECK-LABEL: vfptrunc_nxv2f16_nxv2f32:
9
+
; CHECK: # %bb.0:
10
+
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
11
+
; CHECK-NEXT: vfncvt.f.f.w v9, v8, v0.t
12
+
; CHECK-NEXT: vmv1r.v v8, v9
13
+
; CHECK-NEXT: ret
14
+
%v = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> %m, i32%vl)
15
+
ret <vscale x 2 x half> %v
16
+
}
17
+
18
+
define <vscale x 2 x half> @vfptrunc_nxv2f16_nxv2f32_unmasked(<vscale x 2 x float> %a, i32zeroext%vl) {
19
+
; CHECK-LABEL: vfptrunc_nxv2f16_nxv2f32_unmasked:
20
+
; CHECK: # %bb.0:
21
+
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
22
+
; CHECK-NEXT: vfncvt.f.f.w v9, v8
23
+
; CHECK-NEXT: vmv1r.v v8, v9
24
+
; CHECK-NEXT: ret
25
+
%v = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1true, i320), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer), i32%vl)
26
+
ret <vscale x 2 x half> %v
27
+
}
28
+
29
+
declare <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
30
+
31
+
define <vscale x 2 x half> @vfptrunc_nxv2f16_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> %m, i32zeroext%vl) {
32
+
; CHECK-LABEL: vfptrunc_nxv2f16_nxv2f64:
33
+
; CHECK: # %bb.0:
34
+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
35
+
; CHECK-NEXT: vfncvt.rod.f.f.w v10, v8, v0.t
36
+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
37
+
; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t
38
+
; CHECK-NEXT: ret
39
+
%v = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> %m, i32%vl)
40
+
ret <vscale x 2 x half> %v
41
+
}
42
+
43
+
define <vscale x 2 x half> @vfptrunc_nxv2f16_nxv2f64_unmasked(<vscale x 2 x double> %a, i32zeroext%vl) {
44
+
; CHECK-LABEL: vfptrunc_nxv2f16_nxv2f64_unmasked:
45
+
; CHECK: # %bb.0:
46
+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
47
+
; CHECK-NEXT: vfncvt.rod.f.f.w v10, v8
48
+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
49
+
; CHECK-NEXT: vfncvt.f.f.w v8, v10
50
+
; CHECK-NEXT: ret
51
+
%v = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1true, i320), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer), i32%vl)
52
+
ret <vscale x 2 x half> %v
53
+
}
54
+
55
+
declare <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f64.nxv2f32(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
56
+
57
+
define <vscale x 2 x float> @vfptrunc_nxv2f32_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> %m, i32zeroext%vl) {
58
+
; CHECK-LABEL: vfptrunc_nxv2f32_nxv2f64:
59
+
; CHECK: # %bb.0:
60
+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
61
+
; CHECK-NEXT: vfncvt.f.f.w v10, v8, v0.t
62
+
; CHECK-NEXT: vmv.v.v v8, v10
63
+
; CHECK-NEXT: ret
64
+
%v = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f64.nxv2f32(<vscale x 2 x double> %a, <vscale x 2 x i1> %m, i32%vl)
65
+
ret <vscale x 2 x float> %v
66
+
}
67
+
68
+
define <vscale x 2 x float> @vfptrunc_nxv2f32_nxv2f64_unmasked(<vscale x 2 x double> %a, i32zeroext%vl) {
69
+
; CHECK-LABEL: vfptrunc_nxv2f32_nxv2f64_unmasked:
70
+
; CHECK: # %bb.0:
71
+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
72
+
; CHECK-NEXT: vfncvt.f.f.w v10, v8
73
+
; CHECK-NEXT: vmv.v.v v8, v10
74
+
; CHECK-NEXT: ret
75
+
%v = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f64.nxv2f32(<vscale x 2 x double> %a, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1true, i320), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer), i32%vl)
0 commit comments