|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| 2 | +; RUN: opt < %s -passes=instcombine -S | FileCheck %s |
| 3 | + |
| 4 | +; The result has the fewest vector elements between the result and the two operands so the negation can be moved there |
| 5 | +define <2 x double> @test_negation_move_to_result(<6 x double> %a, <3 x double> %b) { |
| 6 | +; CHECK-LABEL: @test_negation_move_to_result( |
| 7 | +; CHECK-NEXT: [[A_NEG:%.*]] = fneg <6 x double> [[A:%.*]] |
| 8 | +; CHECK-NEXT: [[RES:%.*]] = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A_NEG]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1) |
| 9 | +; CHECK-NEXT: ret <2 x double> [[RES]] |
| 10 | +; |
| 11 | + %a.neg = fneg <6 x double> %a |
| 12 | + %res = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> %a.neg, <3 x double> %b, i32 2, i32 3, i32 1) |
| 13 | + ret <2 x double> %res |
| 14 | +} |
| 15 | + |
| 16 | +; The result has the fewest vector elements between the result and the two operands so the negation can be moved there |
| 17 | +; Fast flag should be preserved |
| 18 | +define <2 x double> @test_negation_move_to_result_with_fastflags(<6 x double> %a, <3 x double> %b) { |
| 19 | +; CHECK-LABEL: @test_negation_move_to_result_with_fastflags( |
| 20 | +; CHECK-NEXT: [[A_NEG:%.*]] = fneg <6 x double> [[A:%.*]] |
| 21 | +; CHECK-NEXT: [[RES:%.*]] = tail call fast <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A_NEG]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1) |
| 22 | +; CHECK-NEXT: ret <2 x double> [[RES]] |
| 23 | +; |
| 24 | + %a.neg = fneg <6 x double> %a |
| 25 | + %res = tail call fast <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> %a.neg, <3 x double> %b, i32 2, i32 3, i32 1) |
| 26 | + ret <2 x double> %res |
| 27 | +} |
| 28 | + |
| 29 | +; %b has the fewest vector elements between the result and the two operands so the negation can be moved there |
| 30 | +define <9 x double> @test_move_negation_to_second_operand(<27 x double> %a, <3 x double> %b) { |
| 31 | +; CHECK-LABEL: @test_move_negation_to_second_operand( |
| 32 | +; CHECK-NEXT: [[A_NEG:%.*]] = fneg <27 x double> [[A:%.*]] |
| 33 | +; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A_NEG]], <3 x double> [[B:%.*]], i32 9, i32 3, i32 1) |
| 34 | +; CHECK-NEXT: ret <9 x double> [[RES]] |
| 35 | +; |
| 36 | + %a.neg = fneg <27 x double> %a |
| 37 | + %res = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> %a.neg, <3 x double> %b, i32 9, i32 3, i32 1) |
| 38 | + ret <9 x double> %res |
| 39 | +} |
| 40 | + |
| 41 | +; %b has the fewest vector elements between the result and the two operands so the negation can be moved there |
| 42 | +; Fast flag should be preserved |
| 43 | +define <9 x double> @test_move_negation_to_second_operand_with_fast_flags(<27 x double> %a, <3 x double> %b) { |
| 44 | +; CHECK-LABEL: @test_move_negation_to_second_operand_with_fast_flags( |
| 45 | +; CHECK-NEXT: [[A_NEG:%.*]] = fneg <27 x double> [[A:%.*]] |
| 46 | +; CHECK-NEXT: [[RES:%.*]] = tail call fast <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A_NEG]], <3 x double> [[B:%.*]], i32 9, i32 3, i32 1) |
| 47 | +; CHECK-NEXT: ret <9 x double> [[RES]] |
| 48 | +; |
| 49 | + %a.neg = fneg <27 x double> %a |
| 50 | + %res = tail call fast <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> %a.neg, <3 x double> %b, i32 9, i32 3, i32 1) |
| 51 | + ret <9 x double> %res |
| 52 | +} |
| 53 | + |
| 54 | +; The result has the fewest vector elements between the result and the two operands so the negation can be moved there |
| 55 | +define <2 x double> @test_negation_move_to_result_from_second_operand(<3 x double> %a, <6 x double> %b){ |
| 56 | +; CHECK-LABEL: @test_negation_move_to_result_from_second_operand( |
| 57 | +; CHECK-NEXT: [[B_NEG:%.*]] = fneg <6 x double> [[B:%.*]] |
| 58 | +; CHECK-NEXT: [[RES:%.*]] = tail call <2 x double> @llvm.matrix.multiply.v2f64.v3f64.v6f64(<3 x double> [[A:%.*]], <6 x double> [[B_NEG]], i32 1, i32 3, i32 2) |
| 59 | +; CHECK-NEXT: ret <2 x double> [[RES]] |
| 60 | +; |
| 61 | + %b.neg = fneg <6 x double> %b |
| 62 | + %res = tail call <2 x double> @llvm.matrix.multiply.v2f64.v3f64.v6f64(<3 x double> %a, <6 x double> %b.neg, i32 1, i32 3, i32 2) |
| 63 | + ret <2 x double> %res |
| 64 | +} |
| 65 | + |
| 66 | +; %a has the fewest vector elements between the result and the two operands so the negation can be moved there |
| 67 | +define <9 x double> @test_move_negation_to_first_operand(<3 x double> %a, <27 x double> %b) { |
| 68 | +; CHECK-LABEL: @test_move_negation_to_first_operand( |
| 69 | +; CHECK-NEXT: [[B_NEG:%.*]] = fneg <27 x double> [[B:%.*]] |
| 70 | +; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v3f64.v27f64(<3 x double> [[A:%.*]], <27 x double> [[B_NEG]], i32 1, i32 3, i32 9) |
| 71 | +; CHECK-NEXT: ret <9 x double> [[RES]] |
| 72 | +; |
| 73 | + %b.neg = fneg <27 x double> %b |
| 74 | + %res = tail call <9 x double> @llvm.matrix.multiply.v9f64.v3f64.v27f64(<3 x double> %a, <27 x double> %b.neg, i32 1, i32 3, i32 9) |
| 75 | + ret <9 x double> %res |
| 76 | +} |
| 77 | + |
| 78 | +; %a has the fewest vector elements between the result and the two operands so the negation is not moved |
| 79 | +define <15 x double> @test_negation_not_moved(<3 x double> %a, <5 x double> %b) { |
| 80 | +; CHECK-LABEL: @test_negation_not_moved( |
| 81 | +; CHECK-NEXT: [[A_NEG:%.*]] = fneg <3 x double> [[A:%.*]] |
| 82 | +; CHECK-NEXT: [[RES:%.*]] = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> [[A_NEG]], <5 x double> [[B:%.*]], i32 3, i32 1, i32 5) |
| 83 | +; CHECK-NEXT: ret <15 x double> [[RES]] |
| 84 | +; |
| 85 | + %a.neg = fneg <3 x double> %a |
| 86 | + %res = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> %a.neg, <5 x double> %b, i32 3, i32 1, i32 5) |
| 87 | + ret <15 x double> %res |
| 88 | +} |
| 89 | + |
| 90 | +; %b as the fewest vector elements between the result and the two operands so the negation is not moved |
| 91 | +define <15 x double> @test_negation_not_moved_second_operand(<5 x double> %a, <3 x double> %b) { |
| 92 | +; CHECK-LABEL: @test_negation_not_moved_second_operand( |
| 93 | +; CHECK-NEXT: [[B_NEG:%.*]] = fneg <3 x double> [[B:%.*]] |
| 94 | +; CHECK-NEXT: [[RES:%.*]] = tail call <15 x double> @llvm.matrix.multiply.v15f64.v5f64.v3f64(<5 x double> [[A:%.*]], <3 x double> [[B_NEG]], i32 5, i32 1, i32 3) |
| 95 | +; CHECK-NEXT: ret <15 x double> [[RES]] |
| 96 | +; |
| 97 | + %b.neg = fneg <3 x double> %b |
| 98 | + %res = tail call <15 x double> @llvm.matrix.multiply.v15f64.v5f64.v3f64(<5 x double> %a, <3 x double> %b.neg, i32 5, i32 1, i32 3) |
| 99 | + ret <15 x double> %res |
| 100 | +} |
| 101 | + |
| 102 | +; the negation should be moved from the result to operand %a because it has the smallest vector element count |
| 103 | +define <15 x double> @test_negation_on_result(<3 x double> %a, <5 x double> %b) { |
| 104 | +; CHECK-LABEL: @test_negation_on_result( |
| 105 | +; CHECK-NEXT: [[RES:%.*]] = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> [[A:%.*]], <5 x double> [[B:%.*]], i32 3, i32 1, i32 5) |
| 106 | +; CHECK-NEXT: [[RES_2:%.*]] = fneg <15 x double> [[RES]] |
| 107 | +; CHECK-NEXT: ret <15 x double> [[RES_2]] |
| 108 | +; |
| 109 | + %res = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> %a, <5 x double> %b, i32 3, i32 1, i32 5) |
| 110 | + %res.2 = fneg <15 x double> %res |
| 111 | + ret <15 x double> %res.2 |
| 112 | +} |
| 113 | + |
| 114 | +; both negations can be deleted |
| 115 | +define <2 x double> @test_with_two_operands_negated1(<6 x double> %a, <3 x double> %b){ |
| 116 | +; CHECK-LABEL: @test_with_two_operands_negated1( |
| 117 | +; CHECK-NEXT: [[A_NEG:%.*]] = fneg <6 x double> [[A:%.*]] |
| 118 | +; CHECK-NEXT: [[B_NEG:%.*]] = fneg <3 x double> [[B:%.*]] |
| 119 | +; CHECK-NEXT: [[RES:%.*]] = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A_NEG]], <3 x double> [[B_NEG]], i32 2, i32 3, i32 1) |
| 120 | +; CHECK-NEXT: ret <2 x double> [[RES]] |
| 121 | +; |
| 122 | + %a.neg = fneg <6 x double> %a |
| 123 | + %b.neg = fneg <3 x double> %b |
| 124 | + %res = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> %a.neg, <3 x double> %b.neg, i32 2, i32 3, i32 1) |
| 125 | + ret <2 x double> %res |
| 126 | +} |
| 127 | + |
| 128 | +; both negations will be removed |
| 129 | +define <9 x double> @test_with_two_operands_negated2(<27 x double> %a, <3 x double> %b){ |
| 130 | +; CHECK-LABEL: @test_with_two_operands_negated2( |
| 131 | +; CHECK-NEXT: [[A_NEG:%.*]] = fneg <27 x double> [[A:%.*]] |
| 132 | +; CHECK-NEXT: [[B_NEG:%.*]] = fneg <3 x double> [[B:%.*]] |
| 133 | +; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A_NEG]], <3 x double> [[B_NEG]], i32 9, i32 3, i32 1) |
| 134 | +; CHECK-NEXT: ret <9 x double> [[RES]] |
| 135 | +; |
| 136 | + %a.neg = fneg <27 x double> %a |
| 137 | + %b.neg = fneg <3 x double> %b |
| 138 | + %res = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> %a.neg, <3 x double> %b.neg, i32 9, i32 3, i32 1) |
| 139 | + ret <9 x double> %res |
| 140 | +} |
| 141 | + |
| 142 | +; both negations will be removed |
| 143 | +define <9 x double> @test_with_two_operands_negated_with_fastflags(<27 x double> %a, <3 x double> %b){ |
| 144 | +; CHECK-LABEL: @test_with_two_operands_negated_with_fastflags( |
| 145 | +; CHECK-NEXT: [[A_NEG:%.*]] = fneg <27 x double> [[A:%.*]] |
| 146 | +; CHECK-NEXT: [[B_NEG:%.*]] = fneg <3 x double> [[B:%.*]] |
| 147 | +; CHECK-NEXT: [[RES:%.*]] = tail call fast <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A_NEG]], <3 x double> [[B_NEG]], i32 9, i32 3, i32 1) |
| 148 | +; CHECK-NEXT: ret <9 x double> [[RES]] |
| 149 | +; |
| 150 | + %a.neg = fneg <27 x double> %a |
| 151 | + %b.neg = fneg <3 x double> %b |
| 152 | + %res = tail call fast <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> %a.neg, <3 x double> %b.neg, i32 9, i32 3, i32 1) |
| 153 | + ret <9 x double> %res |
| 154 | +} |
| 155 | + |
| 156 | +; both negations should be removed |
| 157 | +define <9 x double> @test_with_two_operands_negated2_commute(<3 x double> %a, <27 x double> %b){ |
| 158 | +; CHECK-LABEL: @test_with_two_operands_negated2_commute( |
| 159 | +; CHECK-NEXT: [[A_NEG:%.*]] = fneg <3 x double> [[A:%.*]] |
| 160 | +; CHECK-NEXT: [[B_NEG:%.*]] = fneg <27 x double> [[B:%.*]] |
| 161 | +; CHECK-NEXT: [[RES:%.*]] = call <9 x double> @llvm.matrix.multiply.v9f64.v3f64.v27f64(<3 x double> [[A_NEG]], <27 x double> [[B_NEG]], i32 1, i32 3, i32 9) |
| 162 | +; CHECK-NEXT: ret <9 x double> [[RES]] |
| 163 | +; |
| 164 | + %a.neg = fneg <3 x double> %a |
| 165 | + %b.neg = fneg <27 x double> %b |
| 166 | + %res = call <9 x double> @llvm.matrix.multiply.v9f64.v3f64.v27f64(<3 x double> %a.neg, <27 x double> %b.neg, i32 1, i32 3, i32 9) |
| 167 | + ret <9 x double> %res |
| 168 | +} |
| 169 | + |
| 170 | +define <4 x double> @matrix_multiply_two_operands_negated_with_same_size(<2 x double> %a, <2 x double> %b) { |
| 171 | +; CHECK-LABEL: @matrix_multiply_two_operands_negated_with_same_size( |
| 172 | +; CHECK-NEXT: [[A_NEG:%.*]] = fneg <2 x double> [[A:%.*]] |
| 173 | +; CHECK-NEXT: [[B_NEG:%.*]] = fneg <2 x double> [[B:%.*]] |
| 174 | +; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.matrix.multiply.v4f64.v2f64.v2f64(<2 x double> [[A_NEG]], <2 x double> [[B_NEG]], i32 2, i32 1, i32 2) |
| 175 | +; CHECK-NEXT: ret <4 x double> [[RES]] |
| 176 | +; |
| 177 | + %a.neg = fneg <2 x double> %a |
| 178 | + %b.neg = fneg <2 x double> %b |
| 179 | + %res = call <4 x double> @llvm.matrix.multiply.v4f64.v2f64.v2f64(<2 x double> %a.neg, <2 x double> %b.neg, i32 2, i32 1, i32 2) |
| 180 | + ret <4 x double> %res |
| 181 | +} |
| 182 | + |
| 183 | +define <2 x double> @matrix_multiply_two_operands_with_multiple_uses(<6 x double> %a, <3 x double> %b) { |
| 184 | +; CHECK-LABEL: @matrix_multiply_two_operands_with_multiple_uses( |
| 185 | +; CHECK-NEXT: [[A_NEG:%.*]] = fneg <6 x double> [[A:%.*]] |
| 186 | +; CHECK-NEXT: [[B_NEG:%.*]] = fneg <3 x double> [[B:%.*]] |
| 187 | +; CHECK-NEXT: [[RES:%.*]] = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A_NEG]], <3 x double> [[B_NEG]], i32 2, i32 3, i32 1) |
| 188 | +; CHECK-NEXT: [[RES_2:%.*]] = shufflevector <6 x double> [[A_NEG]], <6 x double> undef, <2 x i32> <i32 0, i32 1> |
| 189 | +; CHECK-NEXT: [[RES_3:%.*]] = fadd <2 x double> [[RES_2]], [[RES]] |
| 190 | +; CHECK-NEXT: ret <2 x double> [[RES_3]] |
| 191 | +; |
| 192 | + %a.neg = fneg <6 x double> %a |
| 193 | + %b.neg = fneg <3 x double> %b |
| 194 | + %res = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> %a.neg, <3 x double> %b.neg, i32 2, i32 3, i32 1) |
| 195 | + %res.2 = shufflevector <6 x double> %a.neg, <6 x double> undef, |
| 196 | + <2 x i32> <i32 0, i32 1> |
| 197 | + %res.3 = fadd <2 x double> %res.2, %res |
| 198 | + ret <2 x double> %res.3 |
| 199 | +} |
| 200 | + |
| 201 | +define <9 x double> @matrix_multiply_two_operands_with_multiple_uses2(<27 x double> %a, <3 x double> %b, ptr %a_loc, ptr %b_loc){ |
| 202 | +; CHECK-LABEL: @matrix_multiply_two_operands_with_multiple_uses2( |
| 203 | +; CHECK-NEXT: [[A_NEG:%.*]] = fneg <27 x double> [[A:%.*]] |
| 204 | +; CHECK-NEXT: [[B_NEG:%.*]] = fneg <3 x double> [[B:%.*]] |
| 205 | +; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A_NEG]], <3 x double> [[B_NEG]], i32 9, i32 3, i32 1) |
| 206 | +; CHECK-NEXT: store <27 x double> [[A_NEG]], ptr [[A_LOC:%.*]], align 256 |
| 207 | +; CHECK-NEXT: store <3 x double> [[B_NEG]], ptr [[B_LOC:%.*]], align 32 |
| 208 | +; CHECK-NEXT: ret <9 x double> [[RES]] |
| 209 | +; |
| 210 | + %a.neg = fneg <27 x double> %a |
| 211 | + %b.neg = fneg <3 x double> %b |
| 212 | + %res = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> %a.neg, <3 x double> %b.neg, i32 9, i32 3, i32 1) |
| 213 | + store <27 x double> %a.neg, ptr %a_loc |
| 214 | + store <3 x double> %b.neg, ptr %b_loc |
| 215 | + ret <9 x double> %res |
| 216 | +} |
| 217 | + |
| 218 | +define <12 x double> @fneg_with_multiple_uses(<15 x double> %a, <20 x double> %b){ |
| 219 | +; CHECK-LABEL: @fneg_with_multiple_uses( |
| 220 | +; CHECK-NEXT: [[A_NEG:%.*]] = fneg <15 x double> [[A:%.*]] |
| 221 | +; CHECK-NEXT: [[RES:%.*]] = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> [[A_NEG]], <20 x double> [[B:%.*]], i32 3, i32 5, i32 4) |
| 222 | +; CHECK-NEXT: [[RES_2:%.*]] = shufflevector <15 x double> [[A_NEG]], <15 x double> undef, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> |
| 223 | +; CHECK-NEXT: [[RES_3:%.*]] = fadd <12 x double> [[RES_2]], [[RES]] |
| 224 | +; CHECK-NEXT: ret <12 x double> [[RES_3]] |
| 225 | +; |
| 226 | + %a.neg = fneg <15 x double> %a |
| 227 | + %res = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> %a.neg, <20 x double> %b, i32 3, i32 5, i32 4) |
| 228 | + %res.2 = shufflevector <15 x double> %a.neg, <15 x double> undef, |
| 229 | + <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> |
| 230 | + %res.3 = fadd <12 x double> %res.2, %res |
| 231 | + ret <12 x double> %res.3 |
| 232 | +} |
| 233 | + |
| 234 | +define <12 x double> @fneg_with_multiple_uses_2(<15 x double> %a, <20 x double> %b, ptr %a_loc){ |
| 235 | +; CHECK-LABEL: @fneg_with_multiple_uses_2( |
| 236 | +; CHECK-NEXT: [[A_NEG:%.*]] = fneg <15 x double> [[A:%.*]] |
| 237 | +; CHECK-NEXT: [[RES:%.*]] = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> [[A_NEG]], <20 x double> [[B:%.*]], i32 3, i32 5, i32 4) |
| 238 | +; CHECK-NEXT: store <15 x double> [[A_NEG]], ptr [[A_LOC:%.*]], align 128 |
| 239 | +; CHECK-NEXT: ret <12 x double> [[RES]] |
| 240 | +; |
| 241 | + %a.neg = fneg <15 x double> %a |
| 242 | + %res = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> %a.neg, <20 x double> %b, i32 3, i32 5, i32 4) |
| 243 | + store <15 x double> %a.neg, ptr %a_loc |
| 244 | + ret <12 x double> %res |
| 245 | +} |
| 246 | +; negation should be moved to the second operand given it has the smallest operand count |
| 247 | +define <72 x double> @chain_of_matrix_mutliplies(<27 x double> %a, <3 x double> %b, <8 x double> %c) { |
| 248 | +; CHECK-LABEL: @chain_of_matrix_mutliplies( |
| 249 | +; CHECK-NEXT: [[A_NEG:%.*]] = fneg <27 x double> [[A:%.*]] |
| 250 | +; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A_NEG]], <3 x double> [[B:%.*]], i32 9, i32 3, i32 1) |
| 251 | +; CHECK-NEXT: [[RES_2:%.*]] = tail call <72 x double> @llvm.matrix.multiply.v72f64.v9f64.v8f64(<9 x double> [[RES]], <8 x double> [[C:%.*]], i32 9, i32 1, i32 8) |
| 252 | +; CHECK-NEXT: ret <72 x double> [[RES_2]] |
| 253 | +; |
| 254 | + %a.neg = fneg <27 x double> %a |
| 255 | + %res = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> %a.neg, <3 x double> %b, i32 9, i32 3, i32 1) |
| 256 | + %res.2 = tail call <72 x double> @llvm.matrix.multiply.v72f64.v9f64.v8f64(<9 x double> %res, <8 x double> %c, i32 9, i32 1, i32 8) |
| 257 | + ret <72 x double> %res.2 |
| 258 | +} |
| 259 | + |
| 260 | +; first negation should be moved to %a |
| 261 | +; second negation should be moved to the result of the second multipication |
| 262 | +define <6 x double> @chain_of_matrix_mutliplies_with_two_negations(<3 x double> %a, <5 x double> %b, <10 x double> %c) { |
| 263 | +; CHECK-LABEL: @chain_of_matrix_mutliplies_with_two_negations( |
| 264 | +; CHECK-NEXT: [[B_NEG:%.*]] = fneg <5 x double> [[B:%.*]] |
| 265 | +; CHECK-NEXT: [[RES:%.*]] = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> [[A:%.*]], <5 x double> [[B_NEG]], i32 3, i32 1, i32 5) |
| 266 | +; CHECK-NEXT: [[RES_NEG:%.*]] = fneg <15 x double> [[RES]] |
| 267 | +; CHECK-NEXT: [[RES_2:%.*]] = tail call <6 x double> @llvm.matrix.multiply.v6f64.v15f64.v10f64(<15 x double> [[RES_NEG]], <10 x double> [[C:%.*]], i32 3, i32 5, i32 2) |
| 268 | +; CHECK-NEXT: ret <6 x double> [[RES_2]] |
| 269 | +; |
| 270 | + %b.neg = fneg <5 x double> %b |
| 271 | + %res = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> %a, <5 x double> %b.neg, i32 3, i32 1, i32 5) |
| 272 | + %res.neg = fneg <15 x double> %res |
| 273 | + %res.2 = tail call <6 x double> @llvm.matrix.multiply.v6f64.v15f64.v10f64(<15 x double> %res.neg, <10 x double> %c, i32 3, i32 5, i32 2) |
| 274 | + ret <6 x double> %res.2 |
| 275 | +} |
| 276 | + |
| 277 | +; negation should be propagated to the result of the second matrix multiplication |
| 278 | +define <6 x double> @chain_of_matrix_mutliplies_propagation(<15 x double> %a, <20 x double> %b, <8 x double> %c){ |
| 279 | +; CHECK-LABEL: @chain_of_matrix_mutliplies_propagation( |
| 280 | +; CHECK-NEXT: [[A_NEG:%.*]] = fneg <15 x double> [[A:%.*]] |
| 281 | +; CHECK-NEXT: [[RES:%.*]] = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> [[A_NEG]], <20 x double> [[B:%.*]], i32 3, i32 5, i32 4) |
| 282 | +; CHECK-NEXT: [[RES_2:%.*]] = tail call <6 x double> @llvm.matrix.multiply.v6f64.v12f64.v8f64(<12 x double> [[RES]], <8 x double> [[C:%.*]], i32 3, i32 4, i32 2) |
| 283 | +; CHECK-NEXT: ret <6 x double> [[RES_2]] |
| 284 | +; |
| 285 | + %a.neg = fneg <15 x double> %a |
| 286 | + %res = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> %a.neg, <20 x double> %b, i32 3, i32 5, i32 4) |
| 287 | + %res.2 = tail call <6 x double> @llvm.matrix.multiply.v6f64.v12f64.v8f64(<12 x double> %res, <8 x double> %c, i32 3, i32 4, i32 2) |
| 288 | + ret <6 x double> %res.2 |
| 289 | +} |
| 290 | + |
| 291 | +declare <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double>, <3 x double>, i32 immarg, i32 immarg, i32 immarg) #1 |
| 292 | +declare <4 x double> @llvm.matrix.multiply.v4f64.v2f64.v2f64(<2 x double>, <2 x double>, i32 immarg, i32 immarg, i32 immarg) #1 |
| 293 | +declare <2 x double> @llvm.matrix.multiply.v2f64.v3f64.v6f64(<3 x double>, <6 x double>, i32 immarg, i32 immarg, i32 immarg) #1 |
| 294 | +declare <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double>, <3 x double>, i32 immarg, i32 immarg, i32 immarg) #1 |
| 295 | +declare <9 x double> @llvm.matrix.multiply.v9f64.v3f64.v27f64(<3 x double>, <27 x double>, i32 immarg, i32 immarg, i32 immarg) |
| 296 | +declare <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double>, <5 x double>, i32 immarg, i32 immarg, i32 immarg) #1 |
| 297 | +declare <15 x double> @llvm.matrix.multiply.v15f64.v5f64.v3f64(<5 x double>, <3 x double>, i32 immarg, i32 immarg, i32 immarg) #1 |
| 298 | +declare <72 x double> @llvm.matrix.multiply.v72f64.v9f64.v8f64(<9 x double>, <8 x double>, i32 immarg, i32 immarg, i32 immarg) #1 |
| 299 | +declare <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double>, <20 x double>, i32 immarg, i32 immarg, i32 immarg) #1 |
| 300 | +declare <21 x double> @llvm.matrix.multiply.v21f64.v15f64.v35f64(<15 x double>, <35 x double>, i32 immarg, i32 immarg, i32 immarg) #1 |
| 301 | +declare <6 x double> @llvm.matrix.multiply.v6f64.v15f64.v10f64(<15 x double>, <10 x double>, i32 immarg, i32 immarg, i32 immarg) #1 |
| 302 | +declare <6 x double> @llvm.matrix.multiply.v6f64.v12f64.v8f64(<12 x double>, <8 x double>, i32 immarg, i32 immarg, i32 immarg) #1 |
0 commit comments