Skip to content

Commit a251c90

Browse files
committed
fix elementswise
1 parent 60ab073 commit a251c90

File tree

2 files changed

+24
-16
lines changed

2 files changed

+24
-16
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4415,8 +4415,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
44154415
? llvm::Intrinsic::smax
44164416
: llvm::Intrinsic::umax,
44174417
Op0, Op1, nullptr, "elt.max");
4418-
} else
4419-
Result = Builder.CreateMaxNum(Op0, Op1, /*FMFSource=*/nullptr, "elt.max");
4418+
} else {
4419+
FastMathFlags FMF;
4420+
FMF.setNoSignedZeros(true);
4421+
FMFSource FMFSrc(FMF);
4422+
Result = Builder.CreateMaxNum(Op0, Op1, /*FMFSource=*/FMFSrc, "elt.max");
4423+
}
44204424
return RValue::get(Result);
44214425
}
44224426
case Builtin::BI__builtin_elementwise_min: {
@@ -4431,8 +4435,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
44314435
? llvm::Intrinsic::smin
44324436
: llvm::Intrinsic::umin,
44334437
Op0, Op1, nullptr, "elt.min");
4434-
} else
4435-
Result = Builder.CreateMinNum(Op0, Op1, /*FMFSource=*/nullptr, "elt.min");
4438+
} else {
4439+
FastMathFlags FMF;
4440+
FMF.setNoSignedZeros(true);
4441+
FMFSource FMFSrc(FMF);
4442+
Result = Builder.CreateMinNum(Op0, Op1, /*FMFSource=*/FMFSrc, "elt.min");
4443+
}
44364444
return RValue::get(Result);
44374445
}
44384446

clang/test/CodeGen/fmaxnum_fminnum_use_nsz.c

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ float fmin32b(float a, float b) {
158158
// CHECK-NEXT: store <4 x float> [[C]], ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
159159
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]]
160160
// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
161-
// CHECK-NEXT: [[ELT_MIN:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
161+
// CHECK-NEXT: [[ELT_MIN:%.*]] = call nsz <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
162162
// CHECK-NEXT: store <4 x float> [[ELT_MIN]], ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
163163
// CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
164164
// CHECK-NEXT: ret <4 x float> [[TMP2]]
@@ -174,7 +174,7 @@ float fmin32b(float a, float b) {
174174
// CHECK-NO-NANS-NEXT: store <4 x float> [[C]], ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
175175
// CHECK-NO-NANS-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]]
176176
// CHECK-NO-NANS-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
177-
// CHECK-NO-NANS-NEXT: [[ELT_MIN:%.*]] = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
177+
// CHECK-NO-NANS-NEXT: [[ELT_MIN:%.*]] = call nnan nsz <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
178178
// CHECK-NO-NANS-NEXT: store <4 x float> [[ELT_MIN]], ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
179179
// CHECK-NO-NANS-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
180180
// CHECK-NO-NANS-NEXT: ret <4 x float> [[TMP2]]
@@ -190,7 +190,7 @@ float fmin32b(float a, float b) {
190190
// CHECK-NO-INFS-NEXT: store <4 x float> [[C]], ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
191191
// CHECK-NO-INFS-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]]
192192
// CHECK-NO-INFS-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
193-
// CHECK-NO-INFS-NEXT: [[ELT_MIN:%.*]] = call ninf <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
193+
// CHECK-NO-INFS-NEXT: [[ELT_MIN:%.*]] = call ninf nsz <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
194194
// CHECK-NO-INFS-NEXT: store <4 x float> [[ELT_MIN]], ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
195195
// CHECK-NO-INFS-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
196196
// CHECK-NO-INFS-NEXT: ret <4 x float> [[TMP2]]
@@ -364,7 +364,7 @@ float fmin64b(double a, double b) {
364364
// CHECK-NEXT: store <2 x double> [[C]], ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
365365
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]]
366366
// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
367-
// CHECK-NEXT: [[ELT_MIN:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
367+
// CHECK-NEXT: [[ELT_MIN:%.*]] = call nsz <2 x double> @llvm.minnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
368368
// CHECK-NEXT: store <2 x double> [[ELT_MIN]], ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
369369
// CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
370370
// CHECK-NEXT: ret <2 x double> [[TMP2]]
@@ -380,7 +380,7 @@ float fmin64b(double a, double b) {
380380
// CHECK-NO-NANS-NEXT: store <2 x double> [[C]], ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
381381
// CHECK-NO-NANS-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]]
382382
// CHECK-NO-NANS-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
383-
// CHECK-NO-NANS-NEXT: [[ELT_MIN:%.*]] = call nnan <2 x double> @llvm.minnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
383+
// CHECK-NO-NANS-NEXT: [[ELT_MIN:%.*]] = call nnan nsz <2 x double> @llvm.minnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
384384
// CHECK-NO-NANS-NEXT: store <2 x double> [[ELT_MIN]], ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
385385
// CHECK-NO-NANS-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
386386
// CHECK-NO-NANS-NEXT: ret <2 x double> [[TMP2]]
@@ -396,7 +396,7 @@ float fmin64b(double a, double b) {
396396
// CHECK-NO-INFS-NEXT: store <2 x double> [[C]], ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
397397
// CHECK-NO-INFS-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]]
398398
// CHECK-NO-INFS-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
399-
// CHECK-NO-INFS-NEXT: [[ELT_MIN:%.*]] = call ninf <2 x double> @llvm.minnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
399+
// CHECK-NO-INFS-NEXT: [[ELT_MIN:%.*]] = call ninf nsz <2 x double> @llvm.minnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
400400
// CHECK-NO-INFS-NEXT: store <2 x double> [[ELT_MIN]], ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
401401
// CHECK-NO-INFS-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
402402
// CHECK-NO-INFS-NEXT: ret <2 x double> [[TMP2]]
@@ -686,7 +686,7 @@ float fmax32b(float a, float b) {
686686
// CHECK-NEXT: store <4 x float> [[C]], ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
687687
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]]
688688
// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
689-
// CHECK-NEXT: [[ELT_MAX:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
689+
// CHECK-NEXT: [[ELT_MAX:%.*]] = call nsz <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
690690
// CHECK-NEXT: store <4 x float> [[ELT_MAX]], ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
691691
// CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
692692
// CHECK-NEXT: ret <4 x float> [[TMP2]]
@@ -702,7 +702,7 @@ float fmax32b(float a, float b) {
702702
// CHECK-NO-NANS-NEXT: store <4 x float> [[C]], ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
703703
// CHECK-NO-NANS-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]]
704704
// CHECK-NO-NANS-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
705-
// CHECK-NO-NANS-NEXT: [[ELT_MAX:%.*]] = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
705+
// CHECK-NO-NANS-NEXT: [[ELT_MAX:%.*]] = call nnan nsz <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
706706
// CHECK-NO-NANS-NEXT: store <4 x float> [[ELT_MAX]], ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
707707
// CHECK-NO-NANS-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
708708
// CHECK-NO-NANS-NEXT: ret <4 x float> [[TMP2]]
@@ -718,7 +718,7 @@ float fmax32b(float a, float b) {
718718
// CHECK-NO-INFS-NEXT: store <4 x float> [[C]], ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
719719
// CHECK-NO-INFS-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]]
720720
// CHECK-NO-INFS-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
721-
// CHECK-NO-INFS-NEXT: [[ELT_MAX:%.*]] = call ninf <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
721+
// CHECK-NO-INFS-NEXT: [[ELT_MAX:%.*]] = call ninf nsz <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
722722
// CHECK-NO-INFS-NEXT: store <4 x float> [[ELT_MAX]], ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
723723
// CHECK-NO-INFS-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
724724
// CHECK-NO-INFS-NEXT: ret <4 x float> [[TMP2]]
@@ -892,7 +892,7 @@ float fmax64b(double a, double b) {
892892
// CHECK-NEXT: store <2 x double> [[C]], ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
893893
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]]
894894
// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
895-
// CHECK-NEXT: [[ELT_MAX:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
895+
// CHECK-NEXT: [[ELT_MAX:%.*]] = call nsz <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
896896
// CHECK-NEXT: store <2 x double> [[ELT_MAX]], ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
897897
// CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
898898
// CHECK-NEXT: ret <2 x double> [[TMP2]]
@@ -908,7 +908,7 @@ float fmax64b(double a, double b) {
908908
// CHECK-NO-NANS-NEXT: store <2 x double> [[C]], ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
909909
// CHECK-NO-NANS-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]]
910910
// CHECK-NO-NANS-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
911-
// CHECK-NO-NANS-NEXT: [[ELT_MAX:%.*]] = call nnan <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
911+
// CHECK-NO-NANS-NEXT: [[ELT_MAX:%.*]] = call nnan nsz <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
912912
// CHECK-NO-NANS-NEXT: store <2 x double> [[ELT_MAX]], ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
913913
// CHECK-NO-NANS-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
914914
// CHECK-NO-NANS-NEXT: ret <2 x double> [[TMP2]]
@@ -924,7 +924,7 @@ float fmax64b(double a, double b) {
924924
// CHECK-NO-INFS-NEXT: store <2 x double> [[C]], ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
925925
// CHECK-NO-INFS-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]]
926926
// CHECK-NO-INFS-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
927-
// CHECK-NO-INFS-NEXT: [[ELT_MAX:%.*]] = call ninf <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
927+
// CHECK-NO-INFS-NEXT: [[ELT_MAX:%.*]] = call ninf nsz <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
928928
// CHECK-NO-INFS-NEXT: store <2 x double> [[ELT_MAX]], ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
929929
// CHECK-NO-INFS-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[C_ADDR]], align 16, !tbaa [[TBAA6]]
930930
// CHECK-NO-INFS-NEXT: ret <2 x double> [[TMP2]]

0 commit comments

Comments
 (0)