Skip to content

Commit 6be0e97

Browse files
authored
[flang] Add fastmath attributes to complex arithmetic (#70690)
Propagate fast math flags through complex number lowering (when lowering fir.*c directly to llvm floating point operations). The lowering path through the MLIR complex dialect is unchanged. This leads to a small improvement in spec2017 fotonik3d_r.
1 parent 1adb898 commit 6be0e97

File tree

7 files changed

+77
-59
lines changed

7 files changed

+77
-59
lines changed

flang/include/flang/Optimizer/Dialect/FIROps.td

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2538,12 +2538,18 @@ def fir_NegcOp : ComplexUnaryArithmeticOp<"negc">;
25382538

25392539
class ComplexArithmeticOp<string mnemonic, list<Trait> traits = []> :
25402540
fir_ArithmeticOp<mnemonic, traits>,
2541-
Arguments<(ins fir_ComplexType:$lhs, fir_ComplexType:$rhs)>;
2542-
2543-
def fir_AddcOp : ComplexArithmeticOp<"addc", [Commutative]>;
2544-
def fir_SubcOp : ComplexArithmeticOp<"subc">;
2545-
def fir_MulcOp : ComplexArithmeticOp<"mulc", [Commutative]>;
2546-
def fir_DivcOp : ComplexArithmeticOp<"divc">;
2541+
Arguments<(ins fir_ComplexType:$lhs, fir_ComplexType:$rhs,
2542+
DefaultValuedAttr<Arith_FastMathAttr,
2543+
"::mlir::arith::FastMathFlags::none">:$fastmath)>;
2544+
2545+
def fir_AddcOp : ComplexArithmeticOp<"addc",
2546+
[Commutative, DeclareOpInterfaceMethods<ArithFastMathInterface>]>;
2547+
def fir_SubcOp : ComplexArithmeticOp<"subc",
2548+
[DeclareOpInterfaceMethods<ArithFastMathInterface>]>;
2549+
def fir_MulcOp : ComplexArithmeticOp<"mulc",
2550+
[Commutative, DeclareOpInterfaceMethods<ArithFastMathInterface>]>;
2551+
def fir_DivcOp : ComplexArithmeticOp<"divc",
2552+
[DeclareOpInterfaceMethods<ArithFastMathInterface>]>;
25472553
// Pow is a builtin call and not a primitive
25482554

25492555
def fir_CmpcOp : fir_Op<"cmpc",

flang/lib/Optimizer/CodeGen/CodeGen.cpp

Lines changed: 31 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include "mlir/Conversion/OpenMPToLLVM/ConvertOpenMPToLLVM.h"
3434
#include "mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h"
3535
#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h"
36+
#include "mlir/Dialect/Arith/IR/Arith.h"
3637
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
3738
#include "mlir/Dialect/LLVMIR/Transforms/AddComdats.h"
3839
#include "mlir/Dialect/OpenACC/OpenACC.h"
@@ -44,6 +45,7 @@
4445
#include "mlir/Target/LLVMIR/ModuleTranslation.h"
4546
#include "llvm/ADT/ArrayRef.h"
4647
#include "llvm/ADT/TypeSwitch.h"
48+
#include <mlir/Dialect/LLVMIR/LLVMAttrs.h>
4749

4850
namespace fir {
4951
#define GEN_PASS_DEF_FIRTOLLVMLOWERING
@@ -3496,12 +3498,20 @@ struct AbsentOpConversion : public FIROpConversion<fir::AbsentOp> {
34963498
// Primitive operations on Complex types
34973499
//
34983500

3501+
template <typename OPTY>
3502+
static inline mlir::LLVM::FastmathFlagsAttr getLLVMFMFAttr(OPTY op) {
3503+
return mlir::LLVM::FastmathFlagsAttr::get(
3504+
op.getContext(),
3505+
mlir::arith::convertArithFastMathFlagsToLLVM(op.getFastmath()));
3506+
}
3507+
34993508
/// Generate inline code for complex addition/subtraction
35003509
template <typename LLVMOP, typename OPTY>
35013510
static mlir::LLVM::InsertValueOp
35023511
complexSum(OPTY sumop, mlir::ValueRange opnds,
35033512
mlir::ConversionPatternRewriter &rewriter,
35043513
const fir::LLVMTypeConverter &lowering) {
3514+
mlir::LLVM::FastmathFlagsAttr fmf = getLLVMFMFAttr(sumop);
35053515
mlir::Value a = opnds[0];
35063516
mlir::Value b = opnds[1];
35073517
auto loc = sumop.getLoc();
@@ -3511,8 +3521,8 @@ complexSum(OPTY sumop, mlir::ValueRange opnds,
35113521
auto y0 = rewriter.create<mlir::LLVM::ExtractValueOp>(loc, a, 1);
35123522
auto x1 = rewriter.create<mlir::LLVM::ExtractValueOp>(loc, b, 0);
35133523
auto y1 = rewriter.create<mlir::LLVM::ExtractValueOp>(loc, b, 1);
3514-
auto rx = rewriter.create<LLVMOP>(loc, eleTy, x0, x1);
3515-
auto ry = rewriter.create<LLVMOP>(loc, eleTy, y0, y1);
3524+
auto rx = rewriter.create<LLVMOP>(loc, eleTy, x0, x1, fmf);
3525+
auto ry = rewriter.create<LLVMOP>(loc, eleTy, y0, y1, fmf);
35163526
auto r0 = rewriter.create<mlir::LLVM::UndefOp>(loc, ty);
35173527
auto r1 = rewriter.create<mlir::LLVM::InsertValueOp>(loc, r0, rx, 0);
35183528
return rewriter.create<mlir::LLVM::InsertValueOp>(loc, r1, ry, 1);
@@ -3560,6 +3570,7 @@ struct MulcOpConversion : public FIROpConversion<fir::MulcOp> {
35603570
// TODO: Can we use a call to __muldc3 ?
35613571
// given: (x + iy) * (x' + iy')
35623572
// result: (xx'-yy')+i(xy'+yx')
3573+
mlir::LLVM::FastmathFlagsAttr fmf = getLLVMFMFAttr(mulc);
35633574
mlir::Value a = adaptor.getOperands()[0];
35643575
mlir::Value b = adaptor.getOperands()[1];
35653576
auto loc = mulc.getLoc();
@@ -3569,12 +3580,12 @@ struct MulcOpConversion : public FIROpConversion<fir::MulcOp> {
35693580
auto y0 = rewriter.create<mlir::LLVM::ExtractValueOp>(loc, a, 1);
35703581
auto x1 = rewriter.create<mlir::LLVM::ExtractValueOp>(loc, b, 0);
35713582
auto y1 = rewriter.create<mlir::LLVM::ExtractValueOp>(loc, b, 1);
3572-
auto xx = rewriter.create<mlir::LLVM::FMulOp>(loc, eleTy, x0, x1);
3573-
auto yx = rewriter.create<mlir::LLVM::FMulOp>(loc, eleTy, y0, x1);
3574-
auto xy = rewriter.create<mlir::LLVM::FMulOp>(loc, eleTy, x0, y1);
3575-
auto ri = rewriter.create<mlir::LLVM::FAddOp>(loc, eleTy, xy, yx);
3576-
auto yy = rewriter.create<mlir::LLVM::FMulOp>(loc, eleTy, y0, y1);
3577-
auto rr = rewriter.create<mlir::LLVM::FSubOp>(loc, eleTy, xx, yy);
3583+
auto xx = rewriter.create<mlir::LLVM::FMulOp>(loc, eleTy, x0, x1, fmf);
3584+
auto yx = rewriter.create<mlir::LLVM::FMulOp>(loc, eleTy, y0, x1, fmf);
3585+
auto xy = rewriter.create<mlir::LLVM::FMulOp>(loc, eleTy, x0, y1, fmf);
3586+
auto ri = rewriter.create<mlir::LLVM::FAddOp>(loc, eleTy, xy, yx, fmf);
3587+
auto yy = rewriter.create<mlir::LLVM::FMulOp>(loc, eleTy, y0, y1, fmf);
3588+
auto rr = rewriter.create<mlir::LLVM::FSubOp>(loc, eleTy, xx, yy, fmf);
35783589
auto ra = rewriter.create<mlir::LLVM::UndefOp>(loc, ty);
35793590
auto r1 = rewriter.create<mlir::LLVM::InsertValueOp>(loc, ra, rr, 0);
35803591
auto r0 = rewriter.create<mlir::LLVM::InsertValueOp>(loc, r1, ri, 1);
@@ -3594,6 +3605,7 @@ struct DivcOpConversion : public FIROpConversion<fir::DivcOp> {
35943605
// Just generate inline code for now.
35953606
// given: (x + iy) / (x' + iy')
35963607
// result: ((xx'+yy')/d) + i((yx'-xy')/d) where d = x'x' + y'y'
3608+
mlir::LLVM::FastmathFlagsAttr fmf = getLLVMFMFAttr(divc);
35973609
mlir::Value a = adaptor.getOperands()[0];
35983610
mlir::Value b = adaptor.getOperands()[1];
35993611
auto loc = divc.getLoc();
@@ -3603,17 +3615,17 @@ struct DivcOpConversion : public FIROpConversion<fir::DivcOp> {
36033615
auto y0 = rewriter.create<mlir::LLVM::ExtractValueOp>(loc, a, 1);
36043616
auto x1 = rewriter.create<mlir::LLVM::ExtractValueOp>(loc, b, 0);
36053617
auto y1 = rewriter.create<mlir::LLVM::ExtractValueOp>(loc, b, 1);
3606-
auto xx = rewriter.create<mlir::LLVM::FMulOp>(loc, eleTy, x0, x1);
3607-
auto x1x1 = rewriter.create<mlir::LLVM::FMulOp>(loc, eleTy, x1, x1);
3608-
auto yx = rewriter.create<mlir::LLVM::FMulOp>(loc, eleTy, y0, x1);
3609-
auto xy = rewriter.create<mlir::LLVM::FMulOp>(loc, eleTy, x0, y1);
3610-
auto yy = rewriter.create<mlir::LLVM::FMulOp>(loc, eleTy, y0, y1);
3611-
auto y1y1 = rewriter.create<mlir::LLVM::FMulOp>(loc, eleTy, y1, y1);
3612-
auto d = rewriter.create<mlir::LLVM::FAddOp>(loc, eleTy, x1x1, y1y1);
3613-
auto rrn = rewriter.create<mlir::LLVM::FAddOp>(loc, eleTy, xx, yy);
3614-
auto rin = rewriter.create<mlir::LLVM::FSubOp>(loc, eleTy, yx, xy);
3615-
auto rr = rewriter.create<mlir::LLVM::FDivOp>(loc, eleTy, rrn, d);
3616-
auto ri = rewriter.create<mlir::LLVM::FDivOp>(loc, eleTy, rin, d);
3618+
auto xx = rewriter.create<mlir::LLVM::FMulOp>(loc, eleTy, x0, x1, fmf);
3619+
auto x1x1 = rewriter.create<mlir::LLVM::FMulOp>(loc, eleTy, x1, x1, fmf);
3620+
auto yx = rewriter.create<mlir::LLVM::FMulOp>(loc, eleTy, y0, x1, fmf);
3621+
auto xy = rewriter.create<mlir::LLVM::FMulOp>(loc, eleTy, x0, y1, fmf);
3622+
auto yy = rewriter.create<mlir::LLVM::FMulOp>(loc, eleTy, y0, y1, fmf);
3623+
auto y1y1 = rewriter.create<mlir::LLVM::FMulOp>(loc, eleTy, y1, y1, fmf);
3624+
auto d = rewriter.create<mlir::LLVM::FAddOp>(loc, eleTy, x1x1, y1y1, fmf);
3625+
auto rrn = rewriter.create<mlir::LLVM::FAddOp>(loc, eleTy, xx, yy, fmf);
3626+
auto rin = rewriter.create<mlir::LLVM::FSubOp>(loc, eleTy, yx, xy, fmf);
3627+
auto rr = rewriter.create<mlir::LLVM::FDivOp>(loc, eleTy, rrn, d, fmf);
3628+
auto ri = rewriter.create<mlir::LLVM::FDivOp>(loc, eleTy, rin, d, fmf);
36173629
auto ra = rewriter.create<mlir::LLVM::UndefOp>(loc, ty);
36183630
auto r1 = rewriter.create<mlir::LLVM::InsertValueOp>(loc, ra, rr, 0);
36193631
auto r0 = rewriter.create<mlir::LLVM::InsertValueOp>(loc, r1, ri, 1);

flang/test/Fir/convert-to-llvm.fir

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -507,7 +507,7 @@ func.func @test_call_return_val() -> i32 {
507507
// result: (x + x') + i(y + y')
508508

509509
func.func @fir_complex_add(%a: !fir.complex<16>, %b: !fir.complex<16>) -> !fir.complex<16> {
510-
%c = fir.addc %a, %b : !fir.complex<16>
510+
%c = fir.addc %a, %b {fastmath = #arith.fastmath<fast>} : !fir.complex<16>
511511
return %c : !fir.complex<16>
512512
}
513513

@@ -518,8 +518,8 @@ func.func @fir_complex_add(%a: !fir.complex<16>, %b: !fir.complex<16>) -> !fir.c
518518
// CHECK: %[[Y0:.*]] = llvm.extractvalue %[[ARG0]][1] : !llvm.struct<(f128, f128)>
519519
// CHECK: %[[X1:.*]] = llvm.extractvalue %[[ARG1]][0] : !llvm.struct<(f128, f128)>
520520
// CHECK: %[[Y1:.*]] = llvm.extractvalue %[[ARG1]][1] : !llvm.struct<(f128, f128)>
521-
// CHECK: %[[ADD_X0_X1:.*]] = llvm.fadd %[[X0]], %[[X1]] : f128
522-
// CHECK: %[[ADD_Y0_Y1:.*]] = llvm.fadd %[[Y0]], %[[Y1]] : f128
521+
// CHECK: %[[ADD_X0_X1:.*]] = llvm.fadd %[[X0]], %[[X1]] {fastmathFlags = #llvm.fastmath<fast>} : f128
522+
// CHECK: %[[ADD_Y0_Y1:.*]] = llvm.fadd %[[Y0]], %[[Y1]] {fastmathFlags = #llvm.fastmath<fast>} : f128
523523
// CHECK: %{{.*}} = llvm.mlir.undef : !llvm.struct<(f128, f128)>
524524
// CHECK: %{{.*}} = llvm.insertvalue %[[ADD_X0_X1]], %{{.*}}[0] : !llvm.struct<(f128, f128)>
525525
// CHECK: %{{.*}} = llvm.insertvalue %[[ADD_Y0_Y1]], %{{.*}}[1] : !llvm.struct<(f128, f128)>
@@ -532,7 +532,7 @@ func.func @fir_complex_add(%a: !fir.complex<16>, %b: !fir.complex<16>) -> !fir.c
532532
// result: (x - x') + i(y - y')
533533

534534
func.func @fir_complex_sub(%a: !fir.complex<16>, %b: !fir.complex<16>) -> !fir.complex<16> {
535-
%c = fir.subc %a, %b : !fir.complex<16>
535+
%c = fir.subc %a, %b {fastmath = #arith.fastmath<fast>} : !fir.complex<16>
536536
return %c : !fir.complex<16>
537537
}
538538

@@ -543,8 +543,8 @@ func.func @fir_complex_sub(%a: !fir.complex<16>, %b: !fir.complex<16>) -> !fir.c
543543
// CHECK: %[[Y0:.*]] = llvm.extractvalue %[[ARG0]][1] : !llvm.struct<(f128, f128)>
544544
// CHECK: %[[X1:.*]] = llvm.extractvalue %[[ARG1]][0] : !llvm.struct<(f128, f128)>
545545
// CHECK: %[[Y1:.*]] = llvm.extractvalue %[[ARG1]][1] : !llvm.struct<(f128, f128)>
546-
// CHECK: %[[SUB_X0_X1:.*]] = llvm.fsub %[[X0]], %[[X1]] : f128
547-
// CHECK: %[[SUB_Y0_Y1:.*]] = llvm.fsub %[[Y0]], %[[Y1]] : f128
546+
// CHECK: %[[SUB_X0_X1:.*]] = llvm.fsub %[[X0]], %[[X1]] {fastmathFlags = #llvm.fastmath<fast>} : f128
547+
// CHECK: %[[SUB_Y0_Y1:.*]] = llvm.fsub %[[Y0]], %[[Y1]] {fastmathFlags = #llvm.fastmath<fast>} : f128
548548
// CHECK: %{{.*}} = llvm.mlir.undef : !llvm.struct<(f128, f128)>
549549
// CHECK: %{{.*}} = llvm.insertvalue %[[SUB_X0_X1]], %{{.*}}[0] : !llvm.struct<(f128, f128)>
550550
// CHECK: %{{.*}} = llvm.insertvalue %[[SUB_Y0_Y1]], %{{.*}}[1] : !llvm.struct<(f128, f128)>
@@ -557,7 +557,7 @@ func.func @fir_complex_sub(%a: !fir.complex<16>, %b: !fir.complex<16>) -> !fir.c
557557
// result: (xx'-yy')+i(xy'+yx')
558558

559559
func.func @fir_complex_mul(%a: !fir.complex<16>, %b: !fir.complex<16>) -> !fir.complex<16> {
560-
%c = fir.mulc %a, %b : !fir.complex<16>
560+
%c = fir.mulc %a, %b {fastmath = #arith.fastmath<fast>} : !fir.complex<16>
561561
return %c : !fir.complex<16>
562562
}
563563

@@ -568,12 +568,12 @@ func.func @fir_complex_mul(%a: !fir.complex<16>, %b: !fir.complex<16>) -> !fir.c
568568
// CHECK: %[[Y0:.*]] = llvm.extractvalue %[[ARG0]][1] : !llvm.struct<(f128, f128)>
569569
// CHECK: %[[X1:.*]] = llvm.extractvalue %[[ARG1]][0] : !llvm.struct<(f128, f128)>
570570
// CHECK: %[[Y1:.*]] = llvm.extractvalue %[[ARG1]][1] : !llvm.struct<(f128, f128)>
571-
// CHECK: %[[MUL_X0_X1:.*]] = llvm.fmul %[[X0]], %[[X1]] : f128
572-
// CHECK: %[[MUL_Y0_X1:.*]] = llvm.fmul %[[Y0]], %[[X1]] : f128
573-
// CHECK: %[[MUL_X0_Y1:.*]] = llvm.fmul %[[X0]], %[[Y1]] : f128
574-
// CHECK: %[[ADD:.*]] = llvm.fadd %[[MUL_X0_Y1]], %[[MUL_Y0_X1]] : f128
575-
// CHECK: %[[MUL_Y0_Y1:.*]] = llvm.fmul %[[Y0]], %[[Y1]] : f128
576-
// CHECK: %[[SUB:.*]] = llvm.fsub %[[MUL_X0_X1]], %[[MUL_Y0_Y1]] : f128
571+
// CHECK: %[[MUL_X0_X1:.*]] = llvm.fmul %[[X0]], %[[X1]] {fastmathFlags = #llvm.fastmath<fast>} : f128
572+
// CHECK: %[[MUL_Y0_X1:.*]] = llvm.fmul %[[Y0]], %[[X1]] {fastmathFlags = #llvm.fastmath<fast>} : f128
573+
// CHECK: %[[MUL_X0_Y1:.*]] = llvm.fmul %[[X0]], %[[Y1]] {fastmathFlags = #llvm.fastmath<fast>} : f128
574+
// CHECK: %[[ADD:.*]] = llvm.fadd %[[MUL_X0_Y1]], %[[MUL_Y0_X1]] {fastmathFlags = #llvm.fastmath<fast>} : f128
575+
// CHECK: %[[MUL_Y0_Y1:.*]] = llvm.fmul %[[Y0]], %[[Y1]] {fastmathFlags = #llvm.fastmath<fast>} : f128
576+
// CHECK: %[[SUB:.*]] = llvm.fsub %[[MUL_X0_X1]], %[[MUL_Y0_Y1]] {fastmathFlags = #llvm.fastmath<fast>} : f128
577577
// CHECK: %{{.*}} = llvm.mlir.undef : !llvm.struct<(f128, f128)>
578578
// CHECK: %{{.*}} = llvm.insertvalue %[[SUB]], %{{.*}}[0] : !llvm.struct<(f128, f128)>
579579
// CHECK: %{{.*}} = llvm.insertvalue %[[ADD]], %{{.*}}[1] : !llvm.struct<(f128, f128)>
@@ -586,7 +586,7 @@ func.func @fir_complex_mul(%a: !fir.complex<16>, %b: !fir.complex<16>) -> !fir.c
586586
// result: ((xx'+yy')/d) + i((yx'-xy')/d) where d = x'x' + y'y'
587587

588588
func.func @fir_complex_div(%a: !fir.complex<16>, %b: !fir.complex<16>) -> !fir.complex<16> {
589-
%c = fir.divc %a, %b : !fir.complex<16>
589+
%c = fir.divc %a, %b {fastmath = #arith.fastmath<fast>} : !fir.complex<16>
590590
return %c : !fir.complex<16>
591591
}
592592

@@ -597,17 +597,17 @@ func.func @fir_complex_div(%a: !fir.complex<16>, %b: !fir.complex<16>) -> !fir.c
597597
// CHECK: %[[Y0:.*]] = llvm.extractvalue %[[ARG0]][1] : !llvm.struct<(f128, f128)>
598598
// CHECK: %[[X1:.*]] = llvm.extractvalue %[[ARG1]][0] : !llvm.struct<(f128, f128)>
599599
// CHECK: %[[Y1:.*]] = llvm.extractvalue %[[ARG1]][1] : !llvm.struct<(f128, f128)>
600-
// CHECK: %[[MUL_X0_X1:.*]] = llvm.fmul %[[X0]], %[[X1]] : f128
601-
// CHECK: %[[MUL_X1_X1:.*]] = llvm.fmul %[[X1]], %[[X1]] : f128
602-
// CHECK: %[[MUL_Y0_X1:.*]] = llvm.fmul %[[Y0]], %[[X1]] : f128
603-
// CHECK: %[[MUL_X0_Y1:.*]] = llvm.fmul %[[X0]], %[[Y1]] : f128
604-
// CHECK: %[[MUL_Y0_Y1:.*]] = llvm.fmul %[[Y0]], %[[Y1]] : f128
605-
// CHECK: %[[MUL_Y1_Y1:.*]] = llvm.fmul %[[Y1]], %[[Y1]] : f128
606-
// CHECK: %[[ADD_X1X1_Y1Y1:.*]] = llvm.fadd %[[MUL_X1_X1]], %[[MUL_Y1_Y1]] : f128
607-
// CHECK: %[[ADD_X0X1_Y0Y1:.*]] = llvm.fadd %[[MUL_X0_X1]], %[[MUL_Y0_Y1]] : f128
608-
// CHECK: %[[SUB_Y0X1_X0Y1:.*]] = llvm.fsub %[[MUL_Y0_X1]], %[[MUL_X0_Y1]] : f128
609-
// CHECK: %[[DIV0:.*]] = llvm.fdiv %[[ADD_X0X1_Y0Y1]], %[[ADD_X1X1_Y1Y1]] : f128
610-
// CHECK: %[[DIV1:.*]] = llvm.fdiv %[[SUB_Y0X1_X0Y1]], %[[ADD_X1X1_Y1Y1]] : f128
600+
// CHECK: %[[MUL_X0_X1:.*]] = llvm.fmul %[[X0]], %[[X1]] {fastmathFlags = #llvm.fastmath<fast>} : f128
601+
// CHECK: %[[MUL_X1_X1:.*]] = llvm.fmul %[[X1]], %[[X1]] {fastmathFlags = #llvm.fastmath<fast>} : f128
602+
// CHECK: %[[MUL_Y0_X1:.*]] = llvm.fmul %[[Y0]], %[[X1]] {fastmathFlags = #llvm.fastmath<fast>} : f128
603+
// CHECK: %[[MUL_X0_Y1:.*]] = llvm.fmul %[[X0]], %[[Y1]] {fastmathFlags = #llvm.fastmath<fast>} : f128
604+
// CHECK: %[[MUL_Y0_Y1:.*]] = llvm.fmul %[[Y0]], %[[Y1]] {fastmathFlags = #llvm.fastmath<fast>} : f128
605+
// CHECK: %[[MUL_Y1_Y1:.*]] = llvm.fmul %[[Y1]], %[[Y1]] {fastmathFlags = #llvm.fastmath<fast>} : f128
606+
// CHECK: %[[ADD_X1X1_Y1Y1:.*]] = llvm.fadd %[[MUL_X1_X1]], %[[MUL_Y1_Y1]] {fastmathFlags = #llvm.fastmath<fast>} : f128
607+
// CHECK: %[[ADD_X0X1_Y0Y1:.*]] = llvm.fadd %[[MUL_X0_X1]], %[[MUL_Y0_Y1]] {fastmathFlags = #llvm.fastmath<fast>} : f128
608+
// CHECK: %[[SUB_Y0X1_X0Y1:.*]] = llvm.fsub %[[MUL_Y0_X1]], %[[MUL_X0_Y1]] {fastmathFlags = #llvm.fastmath<fast>} : f128
609+
// CHECK: %[[DIV0:.*]] = llvm.fdiv %[[ADD_X0X1_Y0Y1]], %[[ADD_X1X1_Y1Y1]] {fastmathFlags = #llvm.fastmath<fast>} : f128
610+
// CHECK: %[[DIV1:.*]] = llvm.fdiv %[[SUB_Y0X1_X0Y1]], %[[ADD_X1X1_Y1Y1]] {fastmathFlags = #llvm.fastmath<fast>} : f128
611611
// CHECK: %{{.*}} = llvm.mlir.undef : !llvm.struct<(f128, f128)>
612612
// CHECK: %{{.*}} = llvm.insertvalue %[[DIV0]], %{{.*}}[0] : !llvm.struct<(f128, f128)>
613613
// CHECK: %{{.*}} = llvm.insertvalue %[[DIV1]], %{{.*}}[1] : !llvm.struct<(f128, f128)>

flang/test/Lower/HLFIR/binary-ops.f90

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ subroutine complex_add(x, y, z)
3232
! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %{{.*}}z"} : (!fir.ref<!fir.complex<4>>) -> (!fir.ref<!fir.complex<4>>, !fir.ref<!fir.complex<4>>)
3333
! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_4]]#0 : !fir.ref<!fir.complex<4>>
3434
! CHECK: %[[VAL_7:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<!fir.complex<4>>
35-
! CHECK: %[[VAL_8:.*]] = fir.addc %[[VAL_6]], %[[VAL_7]] : !fir.complex<4>
35+
! CHECK: %[[VAL_8:.*]] = fir.addc %[[VAL_6]], %[[VAL_7]] {fastmath = #arith.fastmath<contract>} : !fir.complex<4>
3636

3737
subroutine int_sub(x, y, z)
3838
integer :: x, y, z
@@ -65,7 +65,7 @@ subroutine complex_sub(x, y, z)
6565
! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %{{.*}}z"} : (!fir.ref<!fir.complex<4>>) -> (!fir.ref<!fir.complex<4>>, !fir.ref<!fir.complex<4>>)
6666
! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_4]]#0 : !fir.ref<!fir.complex<4>>
6767
! CHECK: %[[VAL_7:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<!fir.complex<4>>
68-
! CHECK: %[[VAL_8:.*]] = fir.subc %[[VAL_6]], %[[VAL_7]] : !fir.complex<4>
68+
! CHECK: %[[VAL_8:.*]] = fir.subc %[[VAL_6]], %[[VAL_7]] {fastmath = #arith.fastmath<contract>} : !fir.complex<4>
6969

7070
subroutine int_mul(x, y, z)
7171
integer :: x, y, z
@@ -98,7 +98,7 @@ subroutine complex_mul(x, y, z)
9898
! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %{{.*}}z"} : (!fir.ref<!fir.complex<4>>) -> (!fir.ref<!fir.complex<4>>, !fir.ref<!fir.complex<4>>)
9999
! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_4]]#0 : !fir.ref<!fir.complex<4>>
100100
! CHECK: %[[VAL_7:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<!fir.complex<4>>
101-
! CHECK: %[[VAL_8:.*]] = fir.mulc %[[VAL_6]], %[[VAL_7]] : !fir.complex<4>
101+
! CHECK: %[[VAL_8:.*]] = fir.mulc %[[VAL_6]], %[[VAL_7]] {fastmath = #arith.fastmath<contract>} : !fir.complex<4>
102102

103103
subroutine int_div(x, y, z)
104104
integer :: x, y, z

flang/test/Lower/OpenACC/acc-reduction.f90

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@
163163
! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.complex<4>>, %[[ARG1:.*]]: !fir.ref<!fir.complex<4>>):
164164
! CHECK: %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.complex<4>>
165165
! CHECK: %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref<!fir.complex<4>>
166-
! CHECK: %[[COMBINED:.*]] = fir.mulc %[[LOAD0]], %[[LOAD1]] : !fir.complex<4>
166+
! CHECK: %[[COMBINED:.*]] = fir.mulc %[[LOAD0]], %[[LOAD1]] {fastmath = #arith.fastmath<contract>} : !fir.complex<4>
167167
! CHECK: fir.store %[[COMBINED]] to %[[ARG0]] : !fir.ref<!fir.complex<4>>
168168
! CHECK: acc.yield %[[ARG0]] : !fir.ref<!fir.complex<4>>
169169
! CHECK: }
@@ -183,7 +183,7 @@
183183
! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.complex<4>>, %[[ARG1:.*]]: !fir.ref<!fir.complex<4>>):
184184
! CHECK: %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.complex<4>>
185185
! CHECK: %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref<!fir.complex<4>>
186-
! CHECK: %[[COMBINED:.*]] = fir.addc %[[LOAD0]], %[[LOAD1]] : !fir.complex<4>
186+
! CHECK: %[[COMBINED:.*]] = fir.addc %[[LOAD0]], %[[LOAD1]] {fastmath = #arith.fastmath<contract>} : !fir.complex<4>
187187
! CHECK: fir.store %[[COMBINED]] to %[[ARG0]] : !fir.ref<!fir.complex<4>>
188188
! CHECK: acc.yield %[[ARG0]] : !fir.ref<!fir.complex<4>>
189189
! CHECK: }

flang/test/Lower/array-elemental-calls-2.f90

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ subroutine check_cmplx_part()
144144
! CHECK: %[[VAL_13:.*]] = fir.load %{{.*}} : !fir.ref<!fir.complex<8>>
145145
! CHECK: fir.do_loop
146146
! CHECK: %[[VAL_23:.*]] = fir.array_fetch %{{.*}}, %{{.*}} : (!fir.array<10x!fir.complex<8>>, index) -> !fir.complex<8>
147-
! CHECK: %[[VAL_24:.*]] = fir.addc %[[VAL_23]], %[[VAL_13]] : !fir.complex<8>
147+
! CHECK: %[[VAL_24:.*]] = fir.addc %[[VAL_23]], %[[VAL_13]] {fastmath = #arith.fastmath<contract>} : !fir.complex<8>
148148
! CHECK: %[[VAL_25:.*]] = fir.extract_value %[[VAL_24]], [1 : index] : (!fir.complex<8>) -> f64
149149
! CHECK: fir.call @_QPelem_func_real(%[[VAL_25]]) {{.*}}: (f64) -> i32
150150
end subroutine

0 commit comments

Comments
 (0)