Skip to content

Commit 56cc491

Browse files
Zain Jaffalfhahn
authored andcommitted
[InstCombine] Test for matrix multiplication negation optimisation.
If one of the operands is negated in a multiplication we can optimise the operation by moving the negation to the smallest operand or to the result Reviewed By: fhahn Differential Revision: https://reviews.llvm.org/D133287
1 parent b971920 commit 56cc491

File tree

1 file changed

+302
-0
lines changed

1 file changed

+302
-0
lines changed
Lines changed: 302 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,302 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
3+
4+
; The result has the fewest vector elements between the result and the two operands so the negation can be moved there
5+
define <2 x double> @test_negation_move_to_result(<6 x double> %a, <3 x double> %b) {
6+
; CHECK-LABEL: @test_negation_move_to_result(
7+
; CHECK-NEXT: [[A_NEG:%.*]] = fneg <6 x double> [[A:%.*]]
8+
; CHECK-NEXT: [[RES:%.*]] = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A_NEG]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
9+
; CHECK-NEXT: ret <2 x double> [[RES]]
10+
;
11+
%a.neg = fneg <6 x double> %a
12+
%res = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> %a.neg, <3 x double> %b, i32 2, i32 3, i32 1)
13+
ret <2 x double> %res
14+
}
15+
16+
; The result has the fewest vector elements between the result and the two operands so the negation can be moved there
17+
; Fast flag should be preserved
18+
define <2 x double> @test_negation_move_to_result_with_fastflags(<6 x double> %a, <3 x double> %b) {
19+
; CHECK-LABEL: @test_negation_move_to_result_with_fastflags(
20+
; CHECK-NEXT: [[A_NEG:%.*]] = fneg <6 x double> [[A:%.*]]
21+
; CHECK-NEXT: [[RES:%.*]] = tail call fast <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A_NEG]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
22+
; CHECK-NEXT: ret <2 x double> [[RES]]
23+
;
24+
%a.neg = fneg <6 x double> %a
25+
%res = tail call fast <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> %a.neg, <3 x double> %b, i32 2, i32 3, i32 1)
26+
ret <2 x double> %res
27+
}
28+
29+
; %b has the fewest vector elements between the result and the two operands so the negation can be moved there
30+
define <9 x double> @test_move_negation_to_second_operand(<27 x double> %a, <3 x double> %b) {
31+
; CHECK-LABEL: @test_move_negation_to_second_operand(
32+
; CHECK-NEXT: [[A_NEG:%.*]] = fneg <27 x double> [[A:%.*]]
33+
; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A_NEG]], <3 x double> [[B:%.*]], i32 9, i32 3, i32 1)
34+
; CHECK-NEXT: ret <9 x double> [[RES]]
35+
;
36+
%a.neg = fneg <27 x double> %a
37+
%res = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> %a.neg, <3 x double> %b, i32 9, i32 3, i32 1)
38+
ret <9 x double> %res
39+
}
40+
41+
; %b has the fewest vector elements between the result and the two operands so the negation can be moved there
42+
; Fast flag should be preserved
43+
define <9 x double> @test_move_negation_to_second_operand_with_fast_flags(<27 x double> %a, <3 x double> %b) {
44+
; CHECK-LABEL: @test_move_negation_to_second_operand_with_fast_flags(
45+
; CHECK-NEXT: [[A_NEG:%.*]] = fneg <27 x double> [[A:%.*]]
46+
; CHECK-NEXT: [[RES:%.*]] = tail call fast <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A_NEG]], <3 x double> [[B:%.*]], i32 9, i32 3, i32 1)
47+
; CHECK-NEXT: ret <9 x double> [[RES]]
48+
;
49+
%a.neg = fneg <27 x double> %a
50+
%res = tail call fast <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> %a.neg, <3 x double> %b, i32 9, i32 3, i32 1)
51+
ret <9 x double> %res
52+
}
53+
54+
; The result has the fewest vector elements between the result and the two operands so the negation can be moved there
55+
define <2 x double> @test_negation_move_to_result_from_second_operand(<3 x double> %a, <6 x double> %b){
56+
; CHECK-LABEL: @test_negation_move_to_result_from_second_operand(
57+
; CHECK-NEXT: [[B_NEG:%.*]] = fneg <6 x double> [[B:%.*]]
58+
; CHECK-NEXT: [[RES:%.*]] = tail call <2 x double> @llvm.matrix.multiply.v2f64.v3f64.v6f64(<3 x double> [[A:%.*]], <6 x double> [[B_NEG]], i32 1, i32 3, i32 2)
59+
; CHECK-NEXT: ret <2 x double> [[RES]]
60+
;
61+
%b.neg = fneg <6 x double> %b
62+
%res = tail call <2 x double> @llvm.matrix.multiply.v2f64.v3f64.v6f64(<3 x double> %a, <6 x double> %b.neg, i32 1, i32 3, i32 2)
63+
ret <2 x double> %res
64+
}
65+
66+
; %a has the fewest vector elements between the result and the two operands so the negation can be moved there
67+
define <9 x double> @test_move_negation_to_first_operand(<3 x double> %a, <27 x double> %b) {
68+
; CHECK-LABEL: @test_move_negation_to_first_operand(
69+
; CHECK-NEXT: [[B_NEG:%.*]] = fneg <27 x double> [[B:%.*]]
70+
; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v3f64.v27f64(<3 x double> [[A:%.*]], <27 x double> [[B_NEG]], i32 1, i32 3, i32 9)
71+
; CHECK-NEXT: ret <9 x double> [[RES]]
72+
;
73+
%b.neg = fneg <27 x double> %b
74+
%res = tail call <9 x double> @llvm.matrix.multiply.v9f64.v3f64.v27f64(<3 x double> %a, <27 x double> %b.neg, i32 1, i32 3, i32 9)
75+
ret <9 x double> %res
76+
}
77+
78+
; %a has the fewest vector elements between the result and the two operands so the negation is not moved
79+
define <15 x double> @test_negation_not_moved(<3 x double> %a, <5 x double> %b) {
80+
; CHECK-LABEL: @test_negation_not_moved(
81+
; CHECK-NEXT: [[A_NEG:%.*]] = fneg <3 x double> [[A:%.*]]
82+
; CHECK-NEXT: [[RES:%.*]] = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> [[A_NEG]], <5 x double> [[B:%.*]], i32 3, i32 1, i32 5)
83+
; CHECK-NEXT: ret <15 x double> [[RES]]
84+
;
85+
%a.neg = fneg <3 x double> %a
86+
%res = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> %a.neg, <5 x double> %b, i32 3, i32 1, i32 5)
87+
ret <15 x double> %res
88+
}
89+
90+
; %b as the fewest vector elements between the result and the two operands so the negation is not moved
91+
define <15 x double> @test_negation_not_moved_second_operand(<5 x double> %a, <3 x double> %b) {
92+
; CHECK-LABEL: @test_negation_not_moved_second_operand(
93+
; CHECK-NEXT: [[B_NEG:%.*]] = fneg <3 x double> [[B:%.*]]
94+
; CHECK-NEXT: [[RES:%.*]] = tail call <15 x double> @llvm.matrix.multiply.v15f64.v5f64.v3f64(<5 x double> [[A:%.*]], <3 x double> [[B_NEG]], i32 5, i32 1, i32 3)
95+
; CHECK-NEXT: ret <15 x double> [[RES]]
96+
;
97+
%b.neg = fneg <3 x double> %b
98+
%res = tail call <15 x double> @llvm.matrix.multiply.v15f64.v5f64.v3f64(<5 x double> %a, <3 x double> %b.neg, i32 5, i32 1, i32 3)
99+
ret <15 x double> %res
100+
}
101+
102+
; the negation should be moved from the result to operand %a because it has the smallest vector element count
103+
define <15 x double> @test_negation_on_result(<3 x double> %a, <5 x double> %b) {
104+
; CHECK-LABEL: @test_negation_on_result(
105+
; CHECK-NEXT: [[RES:%.*]] = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> [[A:%.*]], <5 x double> [[B:%.*]], i32 3, i32 1, i32 5)
106+
; CHECK-NEXT: [[RES_2:%.*]] = fneg <15 x double> [[RES]]
107+
; CHECK-NEXT: ret <15 x double> [[RES_2]]
108+
;
109+
%res = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> %a, <5 x double> %b, i32 3, i32 1, i32 5)
110+
%res.2 = fneg <15 x double> %res
111+
ret <15 x double> %res.2
112+
}
113+
114+
; both negations can be deleted
115+
define <2 x double> @test_with_two_operands_negated1(<6 x double> %a, <3 x double> %b){
116+
; CHECK-LABEL: @test_with_two_operands_negated1(
117+
; CHECK-NEXT: [[A_NEG:%.*]] = fneg <6 x double> [[A:%.*]]
118+
; CHECK-NEXT: [[B_NEG:%.*]] = fneg <3 x double> [[B:%.*]]
119+
; CHECK-NEXT: [[RES:%.*]] = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A_NEG]], <3 x double> [[B_NEG]], i32 2, i32 3, i32 1)
120+
; CHECK-NEXT: ret <2 x double> [[RES]]
121+
;
122+
%a.neg = fneg <6 x double> %a
123+
%b.neg = fneg <3 x double> %b
124+
%res = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> %a.neg, <3 x double> %b.neg, i32 2, i32 3, i32 1)
125+
ret <2 x double> %res
126+
}
127+
128+
; both negations will be removed
129+
define <9 x double> @test_with_two_operands_negated2(<27 x double> %a, <3 x double> %b){
130+
; CHECK-LABEL: @test_with_two_operands_negated2(
131+
; CHECK-NEXT: [[A_NEG:%.*]] = fneg <27 x double> [[A:%.*]]
132+
; CHECK-NEXT: [[B_NEG:%.*]] = fneg <3 x double> [[B:%.*]]
133+
; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A_NEG]], <3 x double> [[B_NEG]], i32 9, i32 3, i32 1)
134+
; CHECK-NEXT: ret <9 x double> [[RES]]
135+
;
136+
%a.neg = fneg <27 x double> %a
137+
%b.neg = fneg <3 x double> %b
138+
%res = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> %a.neg, <3 x double> %b.neg, i32 9, i32 3, i32 1)
139+
ret <9 x double> %res
140+
}
141+
142+
; both negations will be removed
143+
define <9 x double> @test_with_two_operands_negated_with_fastflags(<27 x double> %a, <3 x double> %b){
144+
; CHECK-LABEL: @test_with_two_operands_negated_with_fastflags(
145+
; CHECK-NEXT: [[A_NEG:%.*]] = fneg <27 x double> [[A:%.*]]
146+
; CHECK-NEXT: [[B_NEG:%.*]] = fneg <3 x double> [[B:%.*]]
147+
; CHECK-NEXT: [[RES:%.*]] = tail call fast <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A_NEG]], <3 x double> [[B_NEG]], i32 9, i32 3, i32 1)
148+
; CHECK-NEXT: ret <9 x double> [[RES]]
149+
;
150+
%a.neg = fneg <27 x double> %a
151+
%b.neg = fneg <3 x double> %b
152+
%res = tail call fast <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> %a.neg, <3 x double> %b.neg, i32 9, i32 3, i32 1)
153+
ret <9 x double> %res
154+
}
155+
156+
; both negations should be removed
157+
define <9 x double> @test_with_two_operands_negated2_commute(<3 x double> %a, <27 x double> %b){
158+
; CHECK-LABEL: @test_with_two_operands_negated2_commute(
159+
; CHECK-NEXT: [[A_NEG:%.*]] = fneg <3 x double> [[A:%.*]]
160+
; CHECK-NEXT: [[B_NEG:%.*]] = fneg <27 x double> [[B:%.*]]
161+
; CHECK-NEXT: [[RES:%.*]] = call <9 x double> @llvm.matrix.multiply.v9f64.v3f64.v27f64(<3 x double> [[A_NEG]], <27 x double> [[B_NEG]], i32 1, i32 3, i32 9)
162+
; CHECK-NEXT: ret <9 x double> [[RES]]
163+
;
164+
%a.neg = fneg <3 x double> %a
165+
%b.neg = fneg <27 x double> %b
166+
%res = call <9 x double> @llvm.matrix.multiply.v9f64.v3f64.v27f64(<3 x double> %a.neg, <27 x double> %b.neg, i32 1, i32 3, i32 9)
167+
ret <9 x double> %res
168+
}
169+
170+
define <4 x double> @matrix_multiply_two_operands_negated_with_same_size(<2 x double> %a, <2 x double> %b) {
171+
; CHECK-LABEL: @matrix_multiply_two_operands_negated_with_same_size(
172+
; CHECK-NEXT: [[A_NEG:%.*]] = fneg <2 x double> [[A:%.*]]
173+
; CHECK-NEXT: [[B_NEG:%.*]] = fneg <2 x double> [[B:%.*]]
174+
; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.matrix.multiply.v4f64.v2f64.v2f64(<2 x double> [[A_NEG]], <2 x double> [[B_NEG]], i32 2, i32 1, i32 2)
175+
; CHECK-NEXT: ret <4 x double> [[RES]]
176+
;
177+
%a.neg = fneg <2 x double> %a
178+
%b.neg = fneg <2 x double> %b
179+
%res = call <4 x double> @llvm.matrix.multiply.v4f64.v2f64.v2f64(<2 x double> %a.neg, <2 x double> %b.neg, i32 2, i32 1, i32 2)
180+
ret <4 x double> %res
181+
}
182+
183+
define <2 x double> @matrix_multiply_two_operands_with_multiple_uses(<6 x double> %a, <3 x double> %b) {
184+
; CHECK-LABEL: @matrix_multiply_two_operands_with_multiple_uses(
185+
; CHECK-NEXT: [[A_NEG:%.*]] = fneg <6 x double> [[A:%.*]]
186+
; CHECK-NEXT: [[B_NEG:%.*]] = fneg <3 x double> [[B:%.*]]
187+
; CHECK-NEXT: [[RES:%.*]] = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A_NEG]], <3 x double> [[B_NEG]], i32 2, i32 3, i32 1)
188+
; CHECK-NEXT: [[RES_2:%.*]] = shufflevector <6 x double> [[A_NEG]], <6 x double> undef, <2 x i32> <i32 0, i32 1>
189+
; CHECK-NEXT: [[RES_3:%.*]] = fadd <2 x double> [[RES_2]], [[RES]]
190+
; CHECK-NEXT: ret <2 x double> [[RES_3]]
191+
;
192+
%a.neg = fneg <6 x double> %a
193+
%b.neg = fneg <3 x double> %b
194+
%res = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> %a.neg, <3 x double> %b.neg, i32 2, i32 3, i32 1)
195+
%res.2 = shufflevector <6 x double> %a.neg, <6 x double> undef,
196+
<2 x i32> <i32 0, i32 1>
197+
%res.3 = fadd <2 x double> %res.2, %res
198+
ret <2 x double> %res.3
199+
}
200+
201+
define <9 x double> @matrix_multiply_two_operands_with_multiple_uses2(<27 x double> %a, <3 x double> %b, ptr %a_loc, ptr %b_loc){
202+
; CHECK-LABEL: @matrix_multiply_two_operands_with_multiple_uses2(
203+
; CHECK-NEXT: [[A_NEG:%.*]] = fneg <27 x double> [[A:%.*]]
204+
; CHECK-NEXT: [[B_NEG:%.*]] = fneg <3 x double> [[B:%.*]]
205+
; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A_NEG]], <3 x double> [[B_NEG]], i32 9, i32 3, i32 1)
206+
; CHECK-NEXT: store <27 x double> [[A_NEG]], ptr [[A_LOC:%.*]], align 256
207+
; CHECK-NEXT: store <3 x double> [[B_NEG]], ptr [[B_LOC:%.*]], align 32
208+
; CHECK-NEXT: ret <9 x double> [[RES]]
209+
;
210+
%a.neg = fneg <27 x double> %a
211+
%b.neg = fneg <3 x double> %b
212+
%res = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> %a.neg, <3 x double> %b.neg, i32 9, i32 3, i32 1)
213+
store <27 x double> %a.neg, ptr %a_loc
214+
store <3 x double> %b.neg, ptr %b_loc
215+
ret <9 x double> %res
216+
}
217+
218+
define <12 x double> @fneg_with_multiple_uses(<15 x double> %a, <20 x double> %b){
219+
; CHECK-LABEL: @fneg_with_multiple_uses(
220+
; CHECK-NEXT: [[A_NEG:%.*]] = fneg <15 x double> [[A:%.*]]
221+
; CHECK-NEXT: [[RES:%.*]] = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> [[A_NEG]], <20 x double> [[B:%.*]], i32 3, i32 5, i32 4)
222+
; CHECK-NEXT: [[RES_2:%.*]] = shufflevector <15 x double> [[A_NEG]], <15 x double> undef, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
223+
; CHECK-NEXT: [[RES_3:%.*]] = fadd <12 x double> [[RES_2]], [[RES]]
224+
; CHECK-NEXT: ret <12 x double> [[RES_3]]
225+
;
226+
%a.neg = fneg <15 x double> %a
227+
%res = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> %a.neg, <20 x double> %b, i32 3, i32 5, i32 4)
228+
%res.2 = shufflevector <15 x double> %a.neg, <15 x double> undef,
229+
<12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
230+
%res.3 = fadd <12 x double> %res.2, %res
231+
ret <12 x double> %res.3
232+
}
233+
234+
define <12 x double> @fneg_with_multiple_uses_2(<15 x double> %a, <20 x double> %b, ptr %a_loc){
235+
; CHECK-LABEL: @fneg_with_multiple_uses_2(
236+
; CHECK-NEXT: [[A_NEG:%.*]] = fneg <15 x double> [[A:%.*]]
237+
; CHECK-NEXT: [[RES:%.*]] = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> [[A_NEG]], <20 x double> [[B:%.*]], i32 3, i32 5, i32 4)
238+
; CHECK-NEXT: store <15 x double> [[A_NEG]], ptr [[A_LOC:%.*]], align 128
239+
; CHECK-NEXT: ret <12 x double> [[RES]]
240+
;
241+
%a.neg = fneg <15 x double> %a
242+
%res = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> %a.neg, <20 x double> %b, i32 3, i32 5, i32 4)
243+
store <15 x double> %a.neg, ptr %a_loc
244+
ret <12 x double> %res
245+
}
246+
; negation should be moved to the second operand given it has the smallest operand count
247+
define <72 x double> @chain_of_matrix_mutliplies(<27 x double> %a, <3 x double> %b, <8 x double> %c) {
248+
; CHECK-LABEL: @chain_of_matrix_mutliplies(
249+
; CHECK-NEXT: [[A_NEG:%.*]] = fneg <27 x double> [[A:%.*]]
250+
; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A_NEG]], <3 x double> [[B:%.*]], i32 9, i32 3, i32 1)
251+
; CHECK-NEXT: [[RES_2:%.*]] = tail call <72 x double> @llvm.matrix.multiply.v72f64.v9f64.v8f64(<9 x double> [[RES]], <8 x double> [[C:%.*]], i32 9, i32 1, i32 8)
252+
; CHECK-NEXT: ret <72 x double> [[RES_2]]
253+
;
254+
%a.neg = fneg <27 x double> %a
255+
%res = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> %a.neg, <3 x double> %b, i32 9, i32 3, i32 1)
256+
%res.2 = tail call <72 x double> @llvm.matrix.multiply.v72f64.v9f64.v8f64(<9 x double> %res, <8 x double> %c, i32 9, i32 1, i32 8)
257+
ret <72 x double> %res.2
258+
}
259+
260+
; first negation should be moved to %a
261+
; second negation should be moved to the result of the second multipication
262+
define <6 x double> @chain_of_matrix_mutliplies_with_two_negations(<3 x double> %a, <5 x double> %b, <10 x double> %c) {
263+
; CHECK-LABEL: @chain_of_matrix_mutliplies_with_two_negations(
264+
; CHECK-NEXT: [[B_NEG:%.*]] = fneg <5 x double> [[B:%.*]]
265+
; CHECK-NEXT: [[RES:%.*]] = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> [[A:%.*]], <5 x double> [[B_NEG]], i32 3, i32 1, i32 5)
266+
; CHECK-NEXT: [[RES_NEG:%.*]] = fneg <15 x double> [[RES]]
267+
; CHECK-NEXT: [[RES_2:%.*]] = tail call <6 x double> @llvm.matrix.multiply.v6f64.v15f64.v10f64(<15 x double> [[RES_NEG]], <10 x double> [[C:%.*]], i32 3, i32 5, i32 2)
268+
; CHECK-NEXT: ret <6 x double> [[RES_2]]
269+
;
270+
%b.neg = fneg <5 x double> %b
271+
%res = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> %a, <5 x double> %b.neg, i32 3, i32 1, i32 5)
272+
%res.neg = fneg <15 x double> %res
273+
%res.2 = tail call <6 x double> @llvm.matrix.multiply.v6f64.v15f64.v10f64(<15 x double> %res.neg, <10 x double> %c, i32 3, i32 5, i32 2)
274+
ret <6 x double> %res.2
275+
}
276+
277+
; negation should be propagated to the result of the second matrix multiplication
278+
define <6 x double> @chain_of_matrix_mutliplies_propagation(<15 x double> %a, <20 x double> %b, <8 x double> %c){
279+
; CHECK-LABEL: @chain_of_matrix_mutliplies_propagation(
280+
; CHECK-NEXT: [[A_NEG:%.*]] = fneg <15 x double> [[A:%.*]]
281+
; CHECK-NEXT: [[RES:%.*]] = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> [[A_NEG]], <20 x double> [[B:%.*]], i32 3, i32 5, i32 4)
282+
; CHECK-NEXT: [[RES_2:%.*]] = tail call <6 x double> @llvm.matrix.multiply.v6f64.v12f64.v8f64(<12 x double> [[RES]], <8 x double> [[C:%.*]], i32 3, i32 4, i32 2)
283+
; CHECK-NEXT: ret <6 x double> [[RES_2]]
284+
;
285+
%a.neg = fneg <15 x double> %a
286+
%res = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> %a.neg, <20 x double> %b, i32 3, i32 5, i32 4)
287+
%res.2 = tail call <6 x double> @llvm.matrix.multiply.v6f64.v12f64.v8f64(<12 x double> %res, <8 x double> %c, i32 3, i32 4, i32 2)
288+
ret <6 x double> %res.2
289+
}
290+
291+
declare <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double>, <3 x double>, i32 immarg, i32 immarg, i32 immarg) #1
292+
declare <4 x double> @llvm.matrix.multiply.v4f64.v2f64.v2f64(<2 x double>, <2 x double>, i32 immarg, i32 immarg, i32 immarg) #1
293+
declare <2 x double> @llvm.matrix.multiply.v2f64.v3f64.v6f64(<3 x double>, <6 x double>, i32 immarg, i32 immarg, i32 immarg) #1
294+
declare <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double>, <3 x double>, i32 immarg, i32 immarg, i32 immarg) #1
295+
declare <9 x double> @llvm.matrix.multiply.v9f64.v3f64.v27f64(<3 x double>, <27 x double>, i32 immarg, i32 immarg, i32 immarg)
296+
declare <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double>, <5 x double>, i32 immarg, i32 immarg, i32 immarg) #1
297+
declare <15 x double> @llvm.matrix.multiply.v15f64.v5f64.v3f64(<5 x double>, <3 x double>, i32 immarg, i32 immarg, i32 immarg) #1
298+
declare <72 x double> @llvm.matrix.multiply.v72f64.v9f64.v8f64(<9 x double>, <8 x double>, i32 immarg, i32 immarg, i32 immarg) #1
299+
declare <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double>, <20 x double>, i32 immarg, i32 immarg, i32 immarg) #1
300+
declare <21 x double> @llvm.matrix.multiply.v21f64.v15f64.v35f64(<15 x double>, <35 x double>, i32 immarg, i32 immarg, i32 immarg) #1
301+
declare <6 x double> @llvm.matrix.multiply.v6f64.v15f64.v10f64(<15 x double>, <10 x double>, i32 immarg, i32 immarg, i32 immarg) #1
302+
declare <6 x double> @llvm.matrix.multiply.v6f64.v12f64.v8f64(<12 x double>, <8 x double>, i32 immarg, i32 immarg, i32 immarg) #1

0 commit comments

Comments
 (0)