4
4
; The result has the fewest vector elements between the result and the two operands so the negation can be moved there
5
5
define <2 x double > @test_negation_move_to_result (<6 x double > %a , <3 x double > %b ) {
6
6
; CHECK-LABEL: @test_negation_move_to_result(
7
- ; CHECK-NEXT: [[A_NEG :%.*]] = fneg < 6 x double> [[A:%.*]]
8
- ; CHECK-NEXT: [[RES :%.*]] = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A_NEG]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
9
- ; CHECK-NEXT: ret <2 x double> [[RES ]]
7
+ ; CHECK-NEXT: [[TMP1 :%.*]] = call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(< 6 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
8
+ ; CHECK-NEXT: [[TMP2 :%.*]] = fneg <2 x double> [[TMP1]]
9
+ ; CHECK-NEXT: ret <2 x double> [[TMP2 ]]
10
10
;
11
11
%a.neg = fneg <6 x double > %a
12
12
%res = tail call <2 x double > @llvm.matrix.multiply.v2f64.v6f64.v3f64 (<6 x double > %a.neg , <3 x double > %b , i32 2 , i32 3 , i32 1 )
@@ -17,20 +17,53 @@ define <2 x double> @test_negation_move_to_result(<6 x double> %a, <3 x double>
17
17
; Fast flag should be preserved
18
18
define <2 x double > @test_negation_move_to_result_with_fastflags (<6 x double > %a , <3 x double > %b ) {
19
19
; CHECK-LABEL: @test_negation_move_to_result_with_fastflags(
20
- ; CHECK-NEXT: [[A_NEG :%.*]] = fneg < 6 x double> [[A:%.*]]
21
- ; CHECK-NEXT: [[RES :%.*]] = tail call fast <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A_NEG]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
22
- ; CHECK-NEXT: ret <2 x double> [[RES ]]
20
+ ; CHECK-NEXT: [[TMP1 :%.*]] = call fast <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(< 6 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
21
+ ; CHECK-NEXT: [[TMP2 :%.*]] = fneg fast <2 x double> [[TMP1]]
22
+ ; CHECK-NEXT: ret <2 x double> [[TMP2 ]]
23
23
;
24
24
%a.neg = fneg <6 x double > %a
25
25
%res = tail call fast <2 x double > @llvm.matrix.multiply.v2f64.v6f64.v3f64 (<6 x double > %a.neg , <3 x double > %b , i32 2 , i32 3 , i32 1 )
26
26
ret <2 x double > %res
27
27
}
28
28
29
+ define <2 x double > @test_negation_move_to_result_with_nnan_flag (<6 x double > %a , <3 x double > %b ) {
30
+ ; CHECK-LABEL: @test_negation_move_to_result_with_nnan_flag(
31
+ ; CHECK-NEXT: [[TMP1:%.*]] = call nnan <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
32
+ ; CHECK-NEXT: [[TMP2:%.*]] = fneg nnan <2 x double> [[TMP1]]
33
+ ; CHECK-NEXT: ret <2 x double> [[TMP2]]
34
+ ;
35
+ %a.neg = fneg <6 x double > %a
36
+ %res = tail call nnan <2 x double > @llvm.matrix.multiply.v2f64.v6f64.v3f64 (<6 x double > %a.neg , <3 x double > %b , i32 2 , i32 3 , i32 1 )
37
+ ret <2 x double > %res
38
+ }
39
+
40
+ define <2 x double > @test_negation_move_to_result_with_nsz_flag (<6 x double > %a , <3 x double > %b ) {
41
+ ; CHECK-LABEL: @test_negation_move_to_result_with_nsz_flag(
42
+ ; CHECK-NEXT: [[TMP1:%.*]] = call nsz <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
43
+ ; CHECK-NEXT: [[TMP2:%.*]] = fneg nsz <2 x double> [[TMP1]]
44
+ ; CHECK-NEXT: ret <2 x double> [[TMP2]]
45
+ ;
46
+ %a.neg = fneg <6 x double > %a
47
+ %res = tail call nsz <2 x double > @llvm.matrix.multiply.v2f64.v6f64.v3f64 (<6 x double > %a.neg , <3 x double > %b , i32 2 , i32 3 , i32 1 )
48
+ ret <2 x double > %res
49
+ }
50
+
51
+ define <2 x double > @test_negation_move_to_result_with_fastflag_on_negation (<6 x double > %a , <3 x double > %b ) {
52
+ ; CHECK-LABEL: @test_negation_move_to_result_with_fastflag_on_negation(
53
+ ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
54
+ ; CHECK-NEXT: [[TMP2:%.*]] = fneg <2 x double> [[TMP1]]
55
+ ; CHECK-NEXT: ret <2 x double> [[TMP2]]
56
+ ;
57
+ %a.neg = fneg fast<6 x double > %a
58
+ %res = tail call <2 x double > @llvm.matrix.multiply.v2f64.v6f64.v3f64 (<6 x double > %a.neg , <3 x double > %b , i32 2 , i32 3 , i32 1 )
59
+ ret <2 x double > %res
60
+ }
61
+
29
62
; %b has the fewest vector elements between the result and the two operands so the negation can be moved there
30
63
define <9 x double > @test_move_negation_to_second_operand (<27 x double > %a , <3 x double > %b ) {
31
64
; CHECK-LABEL: @test_move_negation_to_second_operand(
32
- ; CHECK-NEXT: [[A_NEG :%.*]] = fneg <27 x double> [[A :%.*]]
33
- ; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A_NEG ]], <3 x double> [[B:%.* ]], i32 9, i32 3, i32 1)
65
+ ; CHECK-NEXT: [[TMP1 :%.*]] = fneg <3 x double> [[B :%.*]]
66
+ ; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A:%.* ]], <3 x double> [[TMP1 ]], i32 9, i32 3, i32 1)
34
67
; CHECK-NEXT: ret <9 x double> [[RES]]
35
68
;
36
69
%a.neg = fneg <27 x double > %a
@@ -42,8 +75,8 @@ define <9 x double> @test_move_negation_to_second_operand(<27 x double> %a, <3 x
42
75
; Fast flag should be preserved
43
76
define <9 x double > @test_move_negation_to_second_operand_with_fast_flags (<27 x double > %a , <3 x double > %b ) {
44
77
; CHECK-LABEL: @test_move_negation_to_second_operand_with_fast_flags(
45
- ; CHECK-NEXT: [[A_NEG :%.*]] = fneg <27 x double> [[A :%.*]]
46
- ; CHECK-NEXT: [[RES:%.*]] = tail call fast <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A_NEG ]], <3 x double> [[B:%.* ]], i32 9, i32 3, i32 1)
78
+ ; CHECK-NEXT: [[TMP1 :%.*]] = fneg <3 x double> [[B :%.*]]
79
+ ; CHECK-NEXT: [[RES:%.*]] = tail call fast <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A:%.* ]], <3 x double> [[TMP1 ]], i32 9, i32 3, i32 1)
47
80
; CHECK-NEXT: ret <9 x double> [[RES]]
48
81
;
49
82
%a.neg = fneg <27 x double > %a
@@ -54,9 +87,9 @@ define <9 x double> @test_move_negation_to_second_operand_with_fast_flags(<27 x
54
87
; The result has the fewest vector elements between the result and the two operands so the negation can be moved there
55
88
define <2 x double > @test_negation_move_to_result_from_second_operand (<3 x double > %a , <6 x double > %b ){
56
89
; CHECK-LABEL: @test_negation_move_to_result_from_second_operand(
57
- ; CHECK-NEXT: [[B_NEG :%.*]] = fneg < 6 x double> [[B:%.*]]
58
- ; CHECK-NEXT: [[RES :%.*]] = tail call <2 x double> @llvm.matrix.multiply.v2f64.v3f64.v6f64(<3 x double> [[A:%.*]], <6 x double> [[B_NEG]], i32 1, i32 3, i32 2)
59
- ; CHECK-NEXT: ret <2 x double> [[RES ]]
90
+ ; CHECK-NEXT: [[TMP1 :%.*]] = call <2 x double> @llvm.matrix.multiply.v2f64.v3f64.v6f64(<3 x double> [[A:%.*]], < 6 x double> [[B:%.*]], i32 1, i32 3, i32 2)
91
+ ; CHECK-NEXT: [[TMP2 :%.*]] = fneg <2 x double> [[TMP1]]
92
+ ; CHECK-NEXT: ret <2 x double> [[TMP2 ]]
60
93
;
61
94
%b.neg = fneg <6 x double > %b
62
95
%res = tail call <2 x double > @llvm.matrix.multiply.v2f64.v3f64.v6f64 (<3 x double > %a , <6 x double > %b.neg , i32 1 , i32 3 , i32 2 )
@@ -66,8 +99,8 @@ define <2 x double> @test_negation_move_to_result_from_second_operand(<3 x doubl
66
99
; %a has the fewest vector elements between the result and the two operands so the negation can be moved there
67
100
define <9 x double > @test_move_negation_to_first_operand (<3 x double > %a , <27 x double > %b ) {
68
101
; CHECK-LABEL: @test_move_negation_to_first_operand(
69
- ; CHECK-NEXT: [[B_NEG :%.*]] = fneg <27 x double> [[B :%.*]]
70
- ; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v3f64.v27f64(<3 x double> [[A:%.* ]], <27 x double> [[B_NEG ]], i32 1, i32 3, i32 9)
102
+ ; CHECK-NEXT: [[TMP1 :%.*]] = fneg <3 x double> [[A :%.*]]
103
+ ; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v3f64.v27f64(<3 x double> [[TMP1 ]], <27 x double> [[B:%.* ]], i32 1, i32 3, i32 9)
71
104
; CHECK-NEXT: ret <9 x double> [[RES]]
72
105
;
73
106
%b.neg = fneg <27 x double > %b
@@ -172,9 +205,10 @@ define <4 x double> @matrix_multiply_two_operands_negated_with_same_size(<2 x do
172
205
173
206
define <2 x double > @matrix_multiply_two_operands_with_multiple_uses (<6 x double > %a , <3 x double > %b ) {
174
207
; CHECK-LABEL: @matrix_multiply_two_operands_with_multiple_uses(
175
- ; CHECK-NEXT: [[RES:%.*]] = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
176
- ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <6 x double> [[A]], <6 x double> poison, <2 x i32> <i32 0, i32 1>
177
- ; CHECK-NEXT: [[RES_3:%.*]] = fsub <2 x double> [[RES]], [[TMP1]]
208
+ ; CHECK-NEXT: [[A_NEG:%.*]] = fneg <6 x double> [[A:%.*]]
209
+ ; CHECK-NEXT: [[RES:%.*]] = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
210
+ ; CHECK-NEXT: [[RES_2:%.*]] = shufflevector <6 x double> [[A_NEG]], <6 x double> undef, <2 x i32> <i32 0, i32 1>
211
+ ; CHECK-NEXT: [[RES_3:%.*]] = fadd <2 x double> [[RES_2]], [[RES]]
178
212
; CHECK-NEXT: ret <2 x double> [[RES_3]]
179
213
;
180
214
%a.neg = fneg <6 x double > %a
@@ -234,8 +268,8 @@ define <12 x double> @fneg_with_multiple_uses_2(<15 x double> %a, <20 x double>
234
268
; negation should be moved to the second operand given it has the smallest operand count
235
269
define <72 x double > @chain_of_matrix_mutliplies (<27 x double > %a , <3 x double > %b , <8 x double > %c ) {
236
270
; CHECK-LABEL: @chain_of_matrix_mutliplies(
237
- ; CHECK-NEXT: [[A_NEG :%.*]] = fneg <27 x double> [[A :%.*]]
238
- ; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A_NEG ]], <3 x double> [[B:%.* ]], i32 9, i32 3, i32 1)
271
+ ; CHECK-NEXT: [[TMP1 :%.*]] = fneg <3 x double> [[B :%.*]]
272
+ ; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A:%.* ]], <3 x double> [[TMP1 ]], i32 9, i32 3, i32 1)
239
273
; CHECK-NEXT: [[RES_2:%.*]] = tail call <72 x double> @llvm.matrix.multiply.v72f64.v9f64.v8f64(<9 x double> [[RES]], <8 x double> [[C:%.*]], i32 9, i32 1, i32 8)
240
274
; CHECK-NEXT: ret <72 x double> [[RES_2]]
241
275
;
@@ -249,11 +283,11 @@ define <72 x double> @chain_of_matrix_mutliplies(<27 x double> %a, <3 x double>
249
283
; second negation should be moved to the result of the second multipication
250
284
define <6 x double > @chain_of_matrix_mutliplies_with_two_negations (<3 x double > %a , <5 x double > %b , <10 x double > %c ) {
251
285
; CHECK-LABEL: @chain_of_matrix_mutliplies_with_two_negations(
252
- ; CHECK-NEXT: [[B_NEG :%.*]] = fneg <5 x double> [[B :%.*]]
253
- ; CHECK-NEXT: [[RES:%.*]] = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> [[A:%.* ]], <5 x double> [[B_NEG ]], i32 3, i32 1, i32 5)
254
- ; CHECK-NEXT: [[RES_NEG :%.*]] = fneg < 15 x double> [[RES]]
255
- ; CHECK-NEXT: [[RES_2 :%.*]] = tail call <6 x double> @llvm.matrix.multiply.v6f64.v15f64.v10f64(<15 x double> [[RES_NEG]], <10 x double> [[C:%.*]], i32 3, i32 5, i32 2)
256
- ; CHECK-NEXT: ret <6 x double> [[RES_2 ]]
286
+ ; CHECK-NEXT: [[TMP1 :%.*]] = fneg <3 x double> [[A :%.*]]
287
+ ; CHECK-NEXT: [[RES:%.*]] = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> [[TMP1 ]], <5 x double> [[B:%.* ]], i32 3, i32 1, i32 5)
288
+ ; CHECK-NEXT: [[TMP2 :%.*]] = call <6 x double> @llvm.matrix.multiply.v6f64.v15f64.v10f64(< 15 x double> [[RES]], <10 x double> [[C:%.*]], i32 3, i32 5, i32 2)
289
+ ; CHECK-NEXT: [[TMP3 :%.*]] = fneg <6 x double> [[TMP2]]
290
+ ; CHECK-NEXT: ret <6 x double> [[TMP3 ]]
257
291
;
258
292
%b.neg = fneg <5 x double > %b
259
293
%res = tail call <15 x double > @llvm.matrix.multiply.v15f64.v3f64.v5f64 (<3 x double > %a , <5 x double > %b.neg , i32 3 , i32 1 , i32 5 )
@@ -265,10 +299,10 @@ define <6 x double> @chain_of_matrix_mutliplies_with_two_negations(<3 x double>
265
299
; negation should be propagated to the result of the second matrix multiplication
266
300
define <6 x double > @chain_of_matrix_mutliplies_propagation (<15 x double > %a , <20 x double > %b , <8 x double > %c ){
267
301
; CHECK-LABEL: @chain_of_matrix_mutliplies_propagation(
268
- ; CHECK-NEXT: [[A_NEG :%.*]] = fneg < 15 x double> [[A:%.*]]
269
- ; CHECK-NEXT: [[RES :%.*]] = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> [[A_NEG ]], <20 x double> [[B :%.*]], i32 3, i32 5 , i32 4 )
270
- ; CHECK-NEXT: [[RES_2 :%.*]] = tail call <6 x double> @llvm.matrix.multiply.v6f64.v12f64.v8f64(<12 x double> [[RES]], <8 x double> [[C:%.*]], i32 3, i32 4, i32 2)
271
- ; CHECK-NEXT: ret <6 x double> [[RES_2 ]]
302
+ ; CHECK-NEXT: [[TMP1 :%.*]] = call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(< 15 x double> [[A:%.*]], <20 x double> [[B:%.*]], i32 3, i32 5, i32 4)
303
+ ; CHECK-NEXT: [[TMP2 :%.*]] = call <6 x double> @llvm.matrix.multiply.v6f64. v12f64.v8f64(<12 x double> [[TMP1 ]], <8 x double> [[C :%.*]], i32 3, i32 4 , i32 2 )
304
+ ; CHECK-NEXT: [[TMP3 :%.*]] = fneg <6 x double> [[TMP2]]
305
+ ; CHECK-NEXT: ret <6 x double> [[TMP3 ]]
272
306
;
273
307
%a.neg = fneg <15 x double > %a
274
308
%res = tail call <12 x double > @llvm.matrix.multiply.v12f64.v15f64.v20f64 (<15 x double > %a.neg , <20 x double > %b , i32 3 , i32 5 , i32 4 )
0 commit comments