Skip to content

Commit 364a5b5

Browse files
authored
Fix a bug in implementation of Smith's algorithm used in complex div. (#78330)
This patch fixes a bug in Smith's algorithm (thanks to @andykaylor who detected it) and makes sure that last option in command line rules.
1 parent b00aa1c commit 364a5b5

File tree

7 files changed

+116
-16
lines changed

7 files changed

+116
-16
lines changed

clang/include/clang/Basic/LangOptions.def

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ BENIGN_LANGOPT(NoSignedZero , 1, 0, "Permit Floating Point optimization wit
220220
BENIGN_LANGOPT(AllowRecip , 1, 0, "Permit Floating Point reciprocal")
221221
BENIGN_LANGOPT(ApproxFunc , 1, 0, "Permit Floating Point approximation")
222222

223-
ENUM_LANGOPT(ComplexRange, ComplexRangeKind, 2, CX_Full, "Enable use of range reduction for complex arithmetics.")
223+
ENUM_LANGOPT(ComplexRange, ComplexRangeKind, 2, CX_None, "Enable use of range reduction for complex arithmetics.")
224224

225225
BENIGN_LANGOPT(ObjCGCBitmapPrint , 1, 0, "printing of GC's bitmap layout for __weak/__strong ivars")
226226

clang/include/clang/Basic/LangOptions.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -414,7 +414,7 @@ class LangOptions : public LangOptionsBase {
414414
IncompleteOnly = 3,
415415
};
416416

417-
enum ComplexRangeKind { CX_Full, CX_Limited, CX_Fortran };
417+
enum ComplexRangeKind { CX_Full, CX_Limited, CX_Fortran, CX_None };
418418

419419
public:
420420
/// The used language standard.

clang/lib/CodeGen/CGExprComplex.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -892,6 +892,9 @@ ComplexPairTy ComplexExprEmitter::EmitRangeReductionDiv(llvm::Value *LHSr,
892892
llvm::Value *LHSi,
893893
llvm::Value *RHSr,
894894
llvm::Value *RHSi) {
895+
// FIXME: This could eventually be replaced by an LLVM intrinsic to
896+
// avoid this long IR sequence.
897+
895898
// (a + ib) / (c + id) = (e + if)
896899
llvm::Value *FAbsRHSr = EmitllvmFAbs(CGF, RHSr); // |c|
897900
llvm::Value *FAbsRHSi = EmitllvmFAbs(CGF, RHSi); // |d|
@@ -936,7 +939,7 @@ ComplexPairTy ComplexExprEmitter::EmitRangeReductionDiv(llvm::Value *LHSr,
936939
llvm::Value *RC = Builder.CreateFMul(CdD, RHSr); // rc
937940
llvm::Value *DpRC = Builder.CreateFAdd(RHSi, RC); // tmp=d+rc
938941

939-
llvm::Value *T7 = Builder.CreateFMul(LHSr, RC); // ar
942+
llvm::Value *T7 = Builder.CreateFMul(LHSr, CdD); // ar
940943
llvm::Value *T8 = Builder.CreateFAdd(T7, LHSi); // ar+b
941944
llvm::Value *DSTFr = Builder.CreateFDiv(T8, DpRC); // (ar+b)/tmp
942945

@@ -978,7 +981,10 @@ ComplexPairTy ComplexExprEmitter::EmitBinDiv(const BinOpInfo &Op) {
978981
return EmitRangeReductionDiv(LHSr, LHSi, RHSr, RHSi);
979982
else if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Limited)
980983
return EmitAlgebraicDiv(LHSr, LHSi, RHSr, RHSi);
981-
else if (!CGF.getLangOpts().FastMath) {
984+
else if (!CGF.getLangOpts().FastMath ||
985+
// '-ffast-math' is used in the command line but followed by an
986+
// '-fno-cx-limited-range'.
987+
Op.FPFeatures.getComplexRange() == LangOptions::CX_Full) {
982988
LHSi = OrigLHSi;
983989
// If we have a complex operand on the RHS and FastMath is not allowed, we
984990
// delegate to a libcall to handle all of the complexities and minimize

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2712,9 +2712,22 @@ static void EmitComplexRangeDiag(const Driver &D,
27122712
<< EnumComplexRangeToStr(Range1) << EnumComplexRangeToStr(Range2);
27132713
}
27142714

2715-
static std::string RenderComplexRangeOption(std::string Range) {
2715+
static std::string
2716+
RenderComplexRangeOption(LangOptions::ComplexRangeKind Range) {
27162717
std::string ComplexRangeStr = "-complex-range=";
2717-
ComplexRangeStr += Range;
2718+
switch (Range) {
2719+
case LangOptions::ComplexRangeKind::CX_Full:
2720+
ComplexRangeStr += "full";
2721+
break;
2722+
case LangOptions::ComplexRangeKind::CX_Limited:
2723+
ComplexRangeStr += "limited";
2724+
break;
2725+
case LangOptions::ComplexRangeKind::CX_Fortran:
2726+
ComplexRangeStr += "fortran";
2727+
break;
2728+
default:
2729+
assert("Unexpected range option");
2730+
}
27182731
return ComplexRangeStr;
27192732
}
27202733

@@ -2764,7 +2777,8 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
27642777
bool StrictFPModel = false;
27652778
StringRef Float16ExcessPrecision = "";
27662779
StringRef BFloat16ExcessPrecision = "";
2767-
LangOptions::ComplexRangeKind Range = LangOptions::ComplexRangeKind::CX_Full;
2780+
LangOptions::ComplexRangeKind Range = LangOptions::ComplexRangeKind::CX_None;
2781+
std::string ComplexRangeStr = "";
27682782

27692783
if (const Arg *A = Args.getLastArg(options::OPT_flimited_precision_EQ)) {
27702784
CmdArgs.push_back("-mlimit-float-precision");
@@ -2780,23 +2794,19 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
27802794
case options::OPT_fcx_limited_range: {
27812795
EmitComplexRangeDiag(D, Range, LangOptions::ComplexRangeKind::CX_Limited);
27822796
Range = LangOptions::ComplexRangeKind::CX_Limited;
2783-
std::string ComplexRangeStr = RenderComplexRangeOption("limited");
2784-
if (!ComplexRangeStr.empty())
2785-
CmdArgs.push_back(Args.MakeArgString(ComplexRangeStr));
27862797
break;
27872798
}
27882799
case options::OPT_fno_cx_limited_range:
2800+
EmitComplexRangeDiag(D, Range, LangOptions::ComplexRangeKind::CX_Full);
27892801
Range = LangOptions::ComplexRangeKind::CX_Full;
27902802
break;
27912803
case options::OPT_fcx_fortran_rules: {
27922804
EmitComplexRangeDiag(D, Range, LangOptions::ComplexRangeKind::CX_Fortran);
27932805
Range = LangOptions::ComplexRangeKind::CX_Fortran;
2794-
std::string ComplexRangeStr = RenderComplexRangeOption("fortran");
2795-
if (!ComplexRangeStr.empty())
2796-
CmdArgs.push_back(Args.MakeArgString(ComplexRangeStr));
27972806
break;
27982807
}
27992808
case options::OPT_fno_cx_fortran_rules:
2809+
EmitComplexRangeDiag(D, Range, LangOptions::ComplexRangeKind::CX_Full);
28002810
Range = LangOptions::ComplexRangeKind::CX_Full;
28012811
break;
28022812
case options::OPT_ffp_model_EQ: {
@@ -3068,9 +3078,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
30683078
SeenUnsafeMathModeOption = true;
30693079
// ffast-math enables fortran rules for complex multiplication and
30703080
// division.
3071-
std::string ComplexRangeStr = RenderComplexRangeOption("limited");
3072-
if (!ComplexRangeStr.empty())
3073-
CmdArgs.push_back(Args.MakeArgString(ComplexRangeStr));
3081+
Range = LangOptions::ComplexRangeKind::CX_Limited;
30743082
break;
30753083
}
30763084
case options::OPT_fno_fast_math:
@@ -3227,6 +3235,10 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
32273235
options::OPT_fstrict_float_cast_overflow, false))
32283236
CmdArgs.push_back("-fno-strict-float-cast-overflow");
32293237

3238+
if (Range != LangOptions::ComplexRangeKind::CX_None)
3239+
ComplexRangeStr = RenderComplexRangeOption(Range);
3240+
if (!ComplexRangeStr.empty())
3241+
CmdArgs.push_back(Args.MakeArgString(ComplexRangeStr));
32303242
if (Args.hasArg(options::OPT_fcx_limited_range))
32313243
CmdArgs.push_back("-fcx-limited-range");
32323244
if (Args.hasArg(options::OPT_fcx_fortran_rules))

clang/test/CodeGen/cx-complex-range.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,25 @@
1515
// RUN: -ffast-math -complex-range=limited -emit-llvm -o - %s \
1616
// RUN: | FileCheck %s --check-prefix=LMTD-FAST
1717

18+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \
19+
// RUN: -ffast-math -complex-range=full -emit-llvm -o - %s \
20+
// RUN: | FileCheck %s --check-prefix=FULL
21+
1822
// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \
1923
// RUN: -fno-cx-fortran-rules -o - | FileCheck %s --check-prefix=FULL
2024

25+
// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \
26+
// RUN: -fcx-limited-range -fno-cx-limited-range -o - \
27+
// RUN: | FileCheck %s --check-prefix=FULL
28+
29+
// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \
30+
// RUN: -fno-cx-limited-range -fcx-limited-range -o - \
31+
// RUN: | FileCheck %s --check-prefix=FULL
32+
33+
// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \
34+
// RUN: -fno-cx-fortran-rules -fcx-fortran-rules -o - \
35+
// RUN: | FileCheck %s --check-prefix=FULL
36+
2137
_Complex float div(_Complex float a, _Complex float b) {
2238
// LABEL: define {{.*}} @div(
2339
// FULL: call {{.*}} @__divsc3
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
2+
// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \
3+
// RUN: -complex-range=fortran -o - | FileCheck %s --check-prefix=FRTRN
4+
5+
// FRTRN-LABEL: define dso_local <2 x float> @div(
6+
// FRTRN-SAME: <2 x float> noundef [[A_COERCE:%.*]], <2 x float> noundef [[B_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
7+
// FRTRN-NEXT: entry:
8+
// FRTRN-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4
9+
// FRTRN-NEXT: [[A:%.*]] = alloca { float, float }, align 4
10+
// FRTRN-NEXT: [[B:%.*]] = alloca { float, float }, align 4
11+
// FRTRN-NEXT: store <2 x float> [[A_COERCE]], ptr [[A]], align 4
12+
// FRTRN-NEXT: store <2 x float> [[B_COERCE]], ptr [[B]], align 4
13+
// FRTRN-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0
14+
// FRTRN-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4
15+
// FRTRN-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1
16+
// FRTRN-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4
17+
// FRTRN-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0
18+
// FRTRN-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 4
19+
// FRTRN-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1
20+
// FRTRN-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 4
21+
// FRTRN-NEXT: [[TMP0:%.*]] = call float @llvm.fabs.f32(float [[B_REAL]])
22+
// FRTRN-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[B_IMAG]])
23+
// FRTRN-NEXT: [[ABS_CMP:%.*]] = fcmp ugt float [[TMP0]], [[TMP1]]
24+
// FRTRN-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]]
25+
// FRTRN: abs_rhsr_greater_or_equal_abs_rhsi:
26+
// FRTRN-NEXT: [[TMP2:%.*]] = fdiv float [[B_IMAG]], [[B_REAL]]
27+
// FRTRN-NEXT: [[TMP3:%.*]] = fmul float [[TMP2]], [[B_IMAG]]
28+
// FRTRN-NEXT: [[TMP4:%.*]] = fadd float [[B_REAL]], [[TMP3]]
29+
// FRTRN-NEXT: [[TMP5:%.*]] = fmul float [[A_IMAG]], [[TMP2]]
30+
// FRTRN-NEXT: [[TMP6:%.*]] = fadd float [[A_REAL]], [[TMP5]]
31+
// FRTRN-NEXT: [[TMP7:%.*]] = fdiv float [[TMP6]], [[TMP4]]
32+
// FRTRN-NEXT: [[TMP8:%.*]] = fmul float [[A_REAL]], [[TMP2]]
33+
// FRTRN-NEXT: [[TMP9:%.*]] = fsub float [[A_IMAG]], [[TMP8]]
34+
// FRTRN-NEXT: [[TMP10:%.*]] = fdiv float [[TMP9]], [[TMP4]]
35+
// FRTRN-NEXT: br label [[COMPLEX_DIV:%.*]]
36+
// FRTRN: abs_rhsr_less_than_abs_rhsi:
37+
// FRTRN-NEXT: [[TMP11:%.*]] = fdiv float [[B_REAL]], [[B_IMAG]]
38+
// FRTRN-NEXT: [[TMP12:%.*]] = fmul float [[TMP11]], [[B_REAL]]
39+
// FRTRN-NEXT: [[TMP13:%.*]] = fadd float [[B_IMAG]], [[TMP12]]
40+
// FRTRN-NEXT: [[TMP14:%.*]] = fmul float [[A_REAL]], [[TMP11]]
41+
// FRTRN-NEXT: [[TMP15:%.*]] = fadd float [[TMP14]], [[A_IMAG]]
42+
// FRTRN-NEXT: [[TMP16:%.*]] = fdiv float [[TMP15]], [[TMP13]]
43+
// FRTRN-NEXT: [[TMP17:%.*]] = fmul float [[A_IMAG]], [[TMP11]]
44+
// FRTRN-NEXT: [[TMP18:%.*]] = fsub float [[TMP17]], [[A_REAL]]
45+
// FRTRN-NEXT: [[TMP19:%.*]] = fdiv float [[TMP18]], [[TMP13]]
46+
// FRTRN-NEXT: br label [[COMPLEX_DIV]]
47+
// FRTRN: complex_div:
48+
// FRTRN-NEXT: [[TMP20:%.*]] = phi float [ [[TMP7]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP16]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ]
49+
// FRTRN-NEXT: [[TMP21:%.*]] = phi float [ [[TMP10]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP19]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ]
50+
// FRTRN-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0
51+
// FRTRN-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1
52+
// FRTRN-NEXT: store float [[TMP20]], ptr [[RETVAL_REALP]], align 4
53+
// FRTRN-NEXT: store float [[TMP21]], ptr [[RETVAL_IMAGP]], align 4
54+
// FRTRN-NEXT: [[TMP22:%.*]] = load <2 x float>, ptr [[RETVAL]], align 4
55+
// FRTRN-NEXT: ret <2 x float> [[TMP22]]
56+
//
57+
_Complex float div(_Complex float a, _Complex float b) {
58+
return a / b;
59+
}

clang/test/Driver/range.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66
// RUN: %clang -### -target x86_64 -fno-cx-limited-range -c %s 2>&1 \
77
// RUN: | FileCheck %s
88

9+
// RUN: %clang -### -target x86_64 -fcx-limited-range -fno-cx-limited-range \
10+
// RUN: -c %s 2>&1 | FileCheck --check-prefix=FULL %s
11+
912
// RUN: %clang -### -target x86_64 -fcx-fortran-rules -c %s 2>&1 \
1013
// RUN: | FileCheck --check-prefix=FRTRN %s
1114

@@ -29,7 +32,11 @@
2932
// RUN: %clang -### -target x86_64 -fcx-limited-range -ffast-math -c %s 2>&1 \
3033
// RUN: | FileCheck --check-prefix=LMTD %s
3134

35+
// RUN: %clang -### -target x86_64 -ffast-math -fno-cx-limited-range -c %s 2>&1 \
36+
// RUN: | FileCheck --check-prefix=FULL %s
37+
3238
// LMTD: -complex-range=limited
39+
// FULL: -complex-range=full
3340
// LMTD-NOT: -complex-range=fortran
3441
// CHECK-NOT: -complex-range=limited
3542
// FRTRN: -complex-range=fortran

0 commit comments

Comments
 (0)