Skip to content

Commit c987f9d

Browse files
author
Francis Visoiu Mistrih
committed
[Matrix] Try to emit fmuladd for both vector and matrix types
For vector * scalar + vector, we emit `fmuladd` directly from clang. This enables it also for matrix * scalar + matrix. rdar://113967122 Differential Revision: https://reviews.llvm.org/D158883
1 parent e7bd436 commit c987f9d

File tree

2 files changed

+123
-12
lines changed

2 files changed

+123
-12
lines changed

clang/lib/CodeGen/CGExprScalar.cpp

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3874,6 +3874,14 @@ Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) {
38743874
}
38753875
}
38763876

3877+
// For vector and matrix adds, try to fold into a fmuladd.
3878+
if (op.LHS->getType()->isFPOrFPVectorTy()) {
3879+
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures);
3880+
// Try to form an fmuladd.
3881+
if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder))
3882+
return FMulAdd;
3883+
}
3884+
38773885
if (op.Ty->isConstantMatrixType()) {
38783886
llvm::MatrixBuilder MB(Builder);
38793887
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures);
@@ -3887,10 +3895,6 @@ Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) {
38873895

38883896
if (op.LHS->getType()->isFPOrFPVectorTy()) {
38893897
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures);
3890-
// Try to form an fmuladd.
3891-
if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder))
3892-
return FMulAdd;
3893-
38943898
return Builder.CreateFAdd(op.LHS, op.RHS, "add");
38953899
}
38963900

@@ -4024,6 +4028,14 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) {
40244028
}
40254029
}
40264030

4031+
// For vector and matrix subs, try to fold into a fmuladd.
4032+
if (op.LHS->getType()->isFPOrFPVectorTy()) {
4033+
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures);
4034+
// Try to form an fmuladd.
4035+
if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder, true))
4036+
return FMulAdd;
4037+
}
4038+
40274039
if (op.Ty->isConstantMatrixType()) {
40284040
llvm::MatrixBuilder MB(Builder);
40294041
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures);
@@ -4037,9 +4049,6 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) {
40374049

40384050
if (op.LHS->getType()->isFPOrFPVectorTy()) {
40394051
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures);
4040-
// Try to form an fmuladd.
4041-
if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder, true))
4042-
return FMulAdd;
40434052
return Builder.CreateFSub(op.LHS, op.RHS, "sub");
40444053
}
40454054

clang/test/CodeGen/ffp-model.c

Lines changed: 107 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
11
// REQUIRES: x86-registered-target
2-
// RUN: %clang -S -emit-llvm -ffp-model=fast -emit-llvm %s -o - \
2+
// RUN: %clang -S -emit-llvm -fenable-matrix -ffp-model=fast %s -o - \
33
// RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-FAST
44

5-
// RUN: %clang -S -emit-llvm -ffp-model=precise %s -o - \
5+
// RUN: %clang -S -emit-llvm -fenable-matrix -ffp-model=precise %s -o - \
66
// RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-PRECISE
77

8-
// RUN: %clang -S -emit-llvm -ffp-model=strict %s -o - \
8+
// RUN: %clang -S -emit-llvm -fenable-matrix -ffp-model=strict %s -o - \
99
// RUN: -target x86_64 | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
1010

11-
// RUN: %clang -S -emit-llvm -ffp-model=strict -ffast-math \
11+
// RUN: %clang -S -emit-llvm -fenable-matrix -ffp-model=strict -ffast-math \
1212
// RUN: -target x86_64 %s -o - | FileCheck %s \
1313
// RUN: --check-prefixes CHECK,CHECK-STRICT-FAST
1414

15-
// RUN: %clang -S -emit-llvm -ffp-model=precise -ffast-math \
15+
// RUN: %clang -S -emit-llvm -fenable-matrix -ffp-model=precise -ffast-math \
1616
// RUN: %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-FAST1
1717

1818
float mymuladd(float x, float y, float z) {
@@ -46,3 +46,105 @@ float mymuladd(float x, float y, float z) {
4646
// CHECK-FAST1: load float, ptr {{.*}}
4747
// CHECK-FAST1: fadd fast float {{.*}}, {{.*}}
4848
}
49+
50+
typedef float __attribute__((ext_vector_type(2))) v2f;
51+
52+
v2f my_vec_muladd(v2f x, float y, v2f z) {
53+
// CHECK: define{{.*}} @my_vec_muladd
54+
return x * y + z;
55+
56+
// CHECK-FAST: fmul fast <2 x float>
57+
// CHECK-FAST: load <2 x float>, ptr
58+
// CHECK-FAST: fadd fast <2 x float>
59+
60+
// CHECK-PRECISE: load <2 x float>, ptr
61+
// CHECK-PRECISE: load float, ptr
62+
// CHECK-PRECISE: load <2 x float>, ptr
63+
// CHECK-PRECISE: call <2 x float> @llvm.fmuladd.v2f32(<2 x float> {{.*}}, <2 x float> {{.*}}, <2 x float> {{.*}})
64+
65+
// CHECK-STRICT: load <2 x float>, ptr
66+
// CHECK-STRICT: load float, ptr
67+
// CHECK-STRICT: call <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float> {{.*}}, <2 x float> {{.*}}, {{.*}})
68+
// CHECK-STRICT: load <2 x float>, ptr
69+
// CHECK-STRICT: call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> {{.*}}, <2 x float> {{.*}}, {{.*}})
70+
71+
// CHECK-STRICT-FAST: load <2 x float>, ptr
72+
// CHECK-STRICT-FAST: load float, ptr
73+
// CHECK-STRICT-FAST: fmul fast <2 x float> {{.*}}, {{.*}}
74+
// CHECK-STRICT-FAST: load <2 x float>, ptr
75+
// CHECK-STRICT-FAST: fadd fast <2 x float> {{.*}}, {{.*}}
76+
77+
// CHECK-FAST1: load <2 x float>, ptr
78+
// CHECK-FAST1: load float, ptr
79+
// CHECK-FAST1: fmul fast <2 x float> {{.*}}, {{.*}}
80+
// CHECK-FAST1: load <2 x float>, ptr {{.*}}
81+
// CHECK-FAST1: fadd fast <2 x float> {{.*}}, {{.*}}
82+
}
83+
84+
typedef float __attribute__((matrix_type(2, 1))) m21f;
85+
86+
m21f my_m21_muladd(m21f x, float y, m21f z) {
87+
// CHECK: define{{.*}} <2 x float> @my_m21_muladd
88+
return x * y + z;
89+
90+
// CHECK-FAST: fmul fast <2 x float>
91+
// CHECK-FAST: load <2 x float>, ptr
92+
// CHECK-FAST: fadd fast <2 x float>
93+
94+
// CHECK-PRECISE: load <2 x float>, ptr
95+
// CHECK-PRECISE: load float, ptr
96+
// CHECK-PRECISE: load <2 x float>, ptr
97+
// CHECK-PRECISE: call <2 x float> @llvm.fmuladd.v2f32(<2 x float> {{.*}}, <2 x float> {{.*}}, <2 x float> {{.*}})
98+
99+
// CHECK-STRICT: load <2 x float>, ptr
100+
// CHECK-STRICT: load float, ptr
101+
// CHECK-STRICT: call <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float> {{.*}}, <2 x float> {{.*}}, {{.*}})
102+
// CHECK-STRICT: load <2 x float>, ptr
103+
// CHECK-STRICT: call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> {{.*}}, <2 x float> {{.*}}, {{.*}})
104+
105+
// CHECK-STRICT-FAST: load <2 x float>, ptr
106+
// CHECK-STRICT-FAST: load float, ptr
107+
// CHECK-STRICT-FAST: fmul fast <2 x float> {{.*}}, {{.*}}
108+
// CHECK-STRICT-FAST: load <2 x float>, ptr
109+
// CHECK-STRICT-FAST: fadd fast <2 x float> {{.*}}, {{.*}}
110+
111+
// CHECK-FAST1: load <2 x float>, ptr
112+
// CHECK-FAST1: load float, ptr
113+
// CHECK-FAST1: fmul fast <2 x float> {{.*}}, {{.*}}
114+
// CHECK-FAST1: load <2 x float>, ptr {{.*}}
115+
// CHECK-FAST1: fadd fast <2 x float> {{.*}}, {{.*}}
116+
}
117+
118+
typedef float __attribute__((matrix_type(2, 2))) m22f;
119+
120+
m22f my_m22_muladd(m22f x, float y, m22f z) {
121+
// CHECK: define{{.*}} <4 x float> @my_m22_muladd
122+
return x * y + z;
123+
124+
// CHECK-FAST: fmul fast <4 x float>
125+
// CHECK-FAST: load <4 x float>, ptr
126+
// CHECK-FAST: fadd fast <4 x float>
127+
128+
// CHECK-PRECISE: load <4 x float>, ptr
129+
// CHECK-PRECISE: load float, ptr
130+
// CHECK-PRECISE: load <4 x float>, ptr
131+
// CHECK-PRECISE: call <4 x float> @llvm.fmuladd.v4f32(<4 x float> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}})
132+
133+
// CHECK-STRICT: load <4 x float>, ptr
134+
// CHECK-STRICT: load float, ptr
135+
// CHECK-STRICT: call <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float> {{.*}}, <4 x float> {{.*}}, {{.*}})
136+
// CHECK-STRICT: load <4 x float>, ptr
137+
// CHECK-STRICT: call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> {{.*}}, <4 x float> {{.*}}, {{.*}})
138+
139+
// CHECK-STRICT-FAST: load <4 x float>, ptr
140+
// CHECK-STRICT-FAST: load float, ptr
141+
// CHECK-STRICT-FAST: fmul fast <4 x float> {{.*}}, {{.*}}
142+
// CHECK-STRICT-FAST: load <4 x float>, ptr
143+
// CHECK-STRICT-FAST: fadd fast <4 x float> {{.*}}, {{.*}}
144+
145+
// CHECK-FAST1: load <4 x float>, ptr
146+
// CHECK-FAST1: load float, ptr
147+
// CHECK-FAST1: fmul fast <4 x float> {{.*}}, {{.*}}
148+
// CHECK-FAST1: load <4 x float>, ptr {{.*}}
149+
// CHECK-FAST1: fadd fast <4 x float> {{.*}}, {{.*}}
150+
}

0 commit comments

Comments
 (0)