@@ -13,15 +13,23 @@ declare <2 x float> @llvm.dx.normalize.v2f32(<2 x float>)
13
13
declare <3 x float > @llvm.dx.normalize.v3f32 (<3 x float >)
14
14
declare <4 x float > @llvm.dx.normalize.v4f32 (<4 x float >)
15
15
16
+ define noundef half @test_normalize_half (half noundef %p0 ) {
17
+ entry:
18
+ ; CHECK: fdiv half %p0, %p0
19
+ %hlsl.normalize = call half @llvm.dx.normalize.f16 (half %p0 )
20
+ ret half %hlsl.normalize
21
+ }
22
+
16
23
define noundef <2 x half > @test_normalize_half2 (<2 x half > noundef %p0 ) {
17
24
entry:
18
25
; CHECK: extractelement <2 x half> %{{.*}}, i64 0
19
- ; CHECK: fmul half %{{.*}}, %{{.*}}
20
- ; CHECK: extractelement <2 x half> %{{.*}}, i64 1
21
- ; CHECK: fmul half %{{.*}}, %{{.*}}
22
- ; CHECK: fadd half %{{.*}}, %{{.*}}
23
- ; EXPCHECK: call half @llvm.sqrt.f16(half %{{.*}})
24
- ; DOPCHECK: call half @dx.op.unary.f16(i32 24, half %{{.*}})
26
+ ; EXPCHECK: call half @llvm.dx.dot2.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}})
27
+ ; DOPCHECK: call half @dx.op.dot2.f16(i32 54, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
28
+ ; EXPCHECK: call half @llvm.dx.rsqrt.f16(half %{{.*}})
29
+ ; DOPCHECK: call half @dx.op.unary.f16(i32 25, half %{{.*}})
30
+ ; CHECK: insertelement <2 x half> poison, half %{{.*}}, i64 0
31
+ ; CHECK: shufflevector <2 x half> %{{.*}}, <2 x half> poison, <2 x i32> zeroinitializer
32
+ ; CHECK: fmul <2 x half> %{{.*}}, %{{.*}}
25
33
26
34
%hlsl.normalize = call <2 x half > @llvm.dx.normalize.v2f16 (<2 x half > %p0 )
27
35
ret <2 x half > %hlsl.normalize
@@ -30,15 +38,13 @@ entry:
30
38
define noundef <3 x half > @test_normalize_half3 (<3 x half > noundef %p0 ) {
31
39
entry:
32
40
; CHECK: extractelement <3 x half> %{{.*}}, i64 0
33
- ; CHECK: fmul half %{{.*}}, %{{.*}}
34
- ; CHECK: extractelement <3 x half> %{{.*}}, i64 1
35
- ; CHECK: fmul half %{{.*}}, %{{.*}}
36
- ; CHECK: fadd half %{{.*}}, %{{.*}}
37
- ; CHECK: extractelement <3 x half> %{{.*}}, i64 2
38
- ; CHECK: fmul half %{{.*}}, %{{.*}}
39
- ; CHECK: fadd half %{{.*}}, %{{.*}}
40
- ; EXPCHECK: call half @llvm.sqrt.f16(half %{{.*}})
41
- ; DOPCHECK: call half @dx.op.unary.f16(i32 24, half %{{.*}})
41
+ ; EXPCHECK: call half @llvm.dx.dot3.v3f16(<3 x half> %{{.*}}, <3 x half> %{{.*}})
42
+ ; DOPCHECK: call half @dx.op.dot3.f16(i32 55, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
43
+ ; EXPCHECK: call half @llvm.dx.rsqrt.f16(half %{{.*}})
44
+ ; DOPCHECK: call half @dx.op.unary.f16(i32 25, half %{{.*}})
45
+ ; CHECK: insertelement <3 x half> poison, half %{{.*}}, i64 0
46
+ ; CHECK: shufflevector <3 x half> %{{.*}}, <3 x half> poison, <3 x i32> zeroinitializer
47
+ ; CHECK: fmul <3 x half> %{{.*}}, %{{.*}}
42
48
43
49
%hlsl.normalize = call <3 x half > @llvm.dx.normalize.v3f16 (<3 x half > %p0 )
44
50
ret <3 x half > %hlsl.normalize
@@ -47,32 +53,35 @@ entry:
47
53
define noundef <4 x half > @test_normalize_half4 (<4 x half > noundef %p0 ) {
48
54
entry:
49
55
; CHECK: extractelement <4 x half> %{{.*}}, i64 0
50
- ; CHECK: fmul half %{{.*}}, %{{.*}}
51
- ; CHECK: extractelement <4 x half> %{{.*}}, i64 1
52
- ; CHECK: fmul half %{{.*}}, %{{.*}}
53
- ; CHECK: fadd half %{{.*}}, %{{.*}}
54
- ; CHECK: extractelement <4 x half> %{{.*}}, i64 2
55
- ; CHECK: fmul half %{{.*}}, %{{.*}}
56
- ; CHECK: fadd half %{{.*}}, %{{.*}}
57
- ; CHECK: extractelement <4 x half> %{{.*}}, i64 3
58
- ; CHECK: fmul half %{{.*}}, %{{.*}}
59
- ; CHECK: fadd half %{{.*}}, %{{.*}}
60
- ; EXPCHECK: call half @llvm.sqrt.f16(half %{{.*}})
61
- ; DOPCHECK: call half @dx.op.unary.f16(i32 24, half %{{.*}})
56
+ ; EXPCHECK: call half @llvm.dx.dot4.v4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}})
57
+ ; DOPCHECK: call half @dx.op.dot4.f16(i32 56, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
58
+ ; EXPCHECK: call half @llvm.dx.rsqrt.f16(half %{{.*}})
59
+ ; DOPCHECK: call half @dx.op.unary.f16(i32 25, half %{{.*}})
60
+ ; CHECK: insertelement <4 x half> poison, half %{{.*}}, i64 0
61
+ ; CHECK: shufflevector <4 x half> %{{.*}}, <4 x half> poison, <4 x i32> zeroinitializer
62
+ ; CHECK: fmul <4 x half> %{{.*}}, %{{.*}}
62
63
63
64
%hlsl.normalize = call <4 x half > @llvm.dx.normalize.v4f16 (<4 x half > %p0 )
64
65
ret <4 x half > %hlsl.normalize
65
66
}
66
67
68
+ define noundef float @test_normalize_float (float noundef %p0 ) {
69
+ entry:
70
+ ; CHECK: fdiv float %p0, %p0
71
+ %hlsl.normalize = call float @llvm.dx.normalize.f32 (float %p0 )
72
+ ret float %hlsl.normalize
73
+ }
74
+
67
75
define noundef <2 x float > @test_normalize_float2 (<2 x float > noundef %p0 ) {
68
76
entry:
69
77
; CHECK: extractelement <2 x float> %{{.*}}, i64 0
70
- ; CHECK: fmul float %{{.*}}, %{{.*}}
71
- ; CHECK: extractelement <2 x float> %{{.*}}, i64 1
72
- ; CHECK: fmul float %{{.*}}, %{{.*}}
73
- ; CHECK: fadd float %{{.*}}, %{{.*}}
74
- ; EXPCHECK: call float @llvm.sqrt.f32(float %{{.*}})
75
- ; DOPCHECK: call float @dx.op.unary.f32(i32 24, float %{{.*}})
78
+ ; EXPCHECK: call float @llvm.dx.dot2.v2f32(<2 x float> %{{.*}}, <2 x float> %{{.*}})
79
+ ; DOPCHECK: call float @dx.op.dot2.f32(i32 54, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
80
+ ; EXPCHECK: call float @llvm.dx.rsqrt.f32(float %{{.*}})
81
+ ; DOPCHECK: call float @dx.op.unary.f32(i32 25, float %{{.*}})
82
+ ; CHECK: insertelement <2 x float> poison, float %{{.*}}, i64 0
83
+ ; CHECK: shufflevector <2 x float> %{{.*}}, <2 x float> poison, <2 x i32> zeroinitializer
84
+ ; CHECK: fmul <2 x float> %{{.*}}, %{{.*}}
76
85
77
86
%hlsl.normalize = call <2 x float > @llvm.dx.normalize.v2f32 (<2 x float > %p0 )
78
87
ret <2 x float > %hlsl.normalize
@@ -81,15 +90,13 @@ entry:
81
90
define noundef <3 x float > @test_normalize_float3 (<3 x float > noundef %p0 ) {
82
91
entry:
83
92
; CHECK: extractelement <3 x float> %{{.*}}, i64 0
84
- ; CHECK: fmul float %{{.*}}, %{{.*}}
85
- ; CHECK: extractelement <3 x float> %{{.*}}, i64 1
86
- ; CHECK: fmul float %{{.*}}, %{{.*}}
87
- ; CHECK: fadd float %{{.*}}, %{{.*}}
88
- ; CHECK: extractelement <3 x float> %{{.*}}, i64 2
89
- ; CHECK: fmul float %{{.*}}, %{{.*}}
90
- ; CHECK: fadd float %{{.*}}, %{{.*}}
91
- ; EXPCHECK: call float @llvm.sqrt.f32(float %{{.*}})
92
- ; DOPCHECK: call float @dx.op.unary.f32(i32 24, float %{{.*}})
93
+ ; EXPCHECK: call float @llvm.dx.dot3.v3f32(<3 x float> %{{.*}}, <3 x float> %{{.*}})
94
+ ; DOPCHECK: call float @dx.op.dot3.f32(i32 55, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
95
+ ; EXPCHECK: call float @llvm.dx.rsqrt.f32(float %{{.*}})
96
+ ; DOPCHECK: call float @dx.op.unary.f32(i32 25, float %{{.*}})
97
+ ; CHECK: insertelement <3 x float> poison, float %{{.*}}, i64 0
98
+ ; CHECK: shufflevector <3 x float> %{{.*}}, <3 x float> poison, <3 x i32> zeroinitializer
99
+ ; CHECK: fmul <3 x float> %{{.*}}, %{{.*}}
93
100
94
101
%hlsl.normalize = call <3 x float > @llvm.dx.normalize.v3f32 (<3 x float > %p0 )
95
102
ret <3 x float > %hlsl.normalize
@@ -98,18 +105,13 @@ entry:
98
105
define noundef <4 x float > @test_normalize_float4 (<4 x float > noundef %p0 ) {
99
106
entry:
100
107
; CHECK: extractelement <4 x float> %{{.*}}, i64 0
101
- ; CHECK: fmul float %{{.*}}, %{{.*}}
102
- ; CHECK: extractelement <4 x float> %{{.*}}, i64 1
103
- ; CHECK: fmul float %{{.*}}, %{{.*}}
104
- ; CHECK: fadd float %{{.*}}, %{{.*}}
105
- ; CHECK: extractelement <4 x float> %{{.*}}, i64 2
106
- ; CHECK: fmul float %{{.*}}, %{{.*}}
107
- ; CHECK: fadd float %{{.*}}, %{{.*}}
108
- ; CHECK: extractelement <4 x float> %{{.*}}, i64 3
109
- ; CHECK: fmul float %{{.*}}, %{{.*}}
110
- ; CHECK: fadd float %{{.*}}, %{{.*}}
111
- ; EXPCHECK: call float @llvm.sqrt.f32(float %{{.*}})
112
- ; DOPCHECK: call float @dx.op.unary.f32(i32 24, float %{{.*}})
108
+ ; EXPCHECK: call float @llvm.dx.dot4.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}})
109
+ ; DOPCHECK: call float @dx.op.dot4.f32(i32 56, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
110
+ ; EXPCHECK: call float @llvm.dx.rsqrt.f32(float %{{.*}})
111
+ ; DOPCHECK: call float @dx.op.unary.f32(i32 25, float %{{.*}})
112
+ ; CHECK: insertelement <4 x float> poison, float %{{.*}}, i64 0
113
+ ; CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> zeroinitializer
114
+ ; CHECK: fmul <4 x float> %{{.*}}, %{{.*}}
113
115
114
116
%hlsl.normalize = call <4 x float > @llvm.dx.normalize.v4f32 (<4 x float > %p0 )
115
117
ret <4 x float > %hlsl.normalize
0 commit comments