llvm · jcranmer-intel · Oct 10, 2023 · Oct 10, 2023 · Oct 10, 2023 · Oct 25, 2023
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
@@ -18448,6 +18448,177 @@ will be on any later loop iteration.
 This intrinsic will only return 0 if the input count is also 0. A non-zero input
 count will produce a non-zero result.
 
+Complex Intrinsics
+------------------
+
+Complex numbers are currently represented, for intrinsic purposes, as vectors of
+floating-point numbers. A scalar complex type is represented using the type
+``<2 x floatty>``, with index ``0`` corresponding to the real part of the number
+and index ``1`` corresponding the imaginary part of the number. A vector complex
+type can be represented by an even-length vector of floating-point numbers,
+with even indices (``0``, ``2``, etc.) corresponding to real parts of numbers
+and the indices one larger (``1``, ``3``, etc.) the corresponding imaginary
+parts.
+
+The precise semantics of these intrinsics depends on the value of the
+``complex-range`` attribute provided as a call-site attribute. This attribute
+takes on three possible values:
+
+``"full"``
+  The semantics has the full expansion as given in Annex G of the C
+  specification. In general, this means it needs to be expanded using the call
+  to the appropriate routine in compiler-rt (e.g., __mulsc3).
+
+``"no-nan"``
+  This code is permitted to allow complex infinities to be represented as NaNs
+  instead, as if the code for the appropriate routine were compiled in a manner
+  that allowed ``isnan(x)`` or ``isinf(x)`` to be optimized as false.
+
+``"limited"``
+  The semantics are equivalent to the naive arithmetic expansion operations
+  (specific expansion is detailed for each arithmetic expression).
+
+When this attribute is not present, it is presumed to be ``"full"`` if no
+fast-math flags are set, and ``"no-nan"`` if ``nnan`` or ``ninf`` flags are
+present.
+
+Fast-math flags are additionally relevant for these intrinsics, particularly in
+the case of ``complex-range=limited`` variants, as those will be likely to be
+expanded in code generation and fast-math flags will propagate to the expanded
+IR in such circumstances.
+
+Intrinsics for complex addition and subtraction are not provided, as these are
+equivalent to ``fadd`` and ``fsub`` instructions, respectively.
+
+'``llvm.experimental.complex.fmul.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic.
+
+::
+
+      declare <2 x float> @llvm.experimental.complex.fmul.v2f32(<2 x float> <op1>, <2 x float> <op2>)
+      declare <2 x double> @llvm.experimental.complex.fmul.v2f64(<2 x double> <op1>, <2 x double> <op2>)
+      declare <4 x float> @llvm.experimental.complex.fmul.v4f32(<4 x float> <op1>, <4 x float> <op2>)
+
+Overview:
+"""""""""
+
+The '``llvm.experimental.complex.fmul``' intrinsic returns the product of its
+two operands.
+
+Arguments:
+""""""""""
+
+The arguments to the '``llvm.experimental.complex.fmul``' intrinsic must be a
+:ref:`vector <t_vector>` of :ref:`floating-point <t_floating>` types of length
+divisible by 2.
+
+Semantics:
+""""""""""
+
+The value produced is the complex product of the two inputs.
+
+If the value of ``complex-range`` attribute is ``no-nan`` or ``limited``, or if
+the ``noinf`` or ``nonan`` fast math flags are provided, the output may be
+equivalent to the following code:
+
+.. code-block:: llvm
+
+      declare <2 x float> limited_complex_mul(<2 x float> %op1, <2 x float> %op2) {
+        %x = extractelement <2 x float> %op1, i32 0 ; real of %op1
+        %y = extractelement <2 x float> %op1, i32 1 ; imag of %op1
+        %u = extractelement <2 x float> %op2, i32 0 ; real of %op2
+        %v = extractelement <2 x float> %op2, i32 1 ; imag of %op2
+        %xu = fmul float %x, %u
+        %yv = fmul float %y, %v
+        %yu = fmul float %y, %u
+        %xv = fmul float %x, %v
+        %out_real = fsub float %xu, %yv
+        %out_imag = fadd float %yu, %xv
+        %ret.0 = insertelement <2 x float> undef, i32 0, %out_real
+        %ret.1 = insertelement <2 x float> %ret.0, i32 1, %out_imag
+        return <2 x float> %ret.1
+      }
+
+When the ``complex-range`` attribute is set to ``full`` or is missing, the above
+code is insufficient to handle the result. Instead, code must be added to
+check for infinities if either the real or imaginary component of the result is
+a NaN value.
+
+
+'``llvm.experimental.complex.fdiv.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic.
+
+::
+
+      declare <2 x float> @llvm.experimental.complex.fdiv.v2f32(<2 x float> <op1>, <2 x float> <op2>)
+      declare <2 x double> @llvm.experimental.complex.fdiv.v2f64(<2 x double> <op1>, <2 x double> <op2>)
+      declare <4 x float> @llvm.experimental.complex.fdiv.v4f32(<4 x float> <op1>, <4 x float> <op2>)
+
+Overview:
+"""""""""
+
+The '``llvm.experimental.complex.fdiv``' intrinsic returns the quotient of its
+two operands.
+
+Arguments:
+""""""""""
+
+The arguments to the '``llvm.experimental.complex.fdiv``' intrinsic must be a
+:ref:`vector <t_vector>` of :ref:`floating-point <t_floating>` types of length
+divisible by 2.
+
+Semantics:
+""""""""""
+
+The value produced is the complex quotient of the two inputs.
+
+If the ``complex-range`` attribute is set to ``limited``, the output will be
+equivalent to the following code:
+
+.. code-block:: llvm
+
+      declare <2 x float> limited_complex_div(<2 x float> %op1, <2 x float> %op2) {
+        %x = extractelement <2 x float> %op1, i32 0 ; real of %op1
+        %y = extractelement <2 x float> %op1, i32 1 ; imag of %op1
+        %u = extractelement <2 x float> %op2, i32 0 ; real of %op2
+        %v = extractelement <2 x float> %op2, i32 1 ; imag of %op2
+        %xu = fmul float %x, %u
+        %yv = fmul float %y, %v
+        %yu = fmul float %y, %u
+        %xv = fmul float %x, %v
+        %uu = fmul float %u, %u
+        %vv = fmul float %v, %v
+        %unscaled_real = fadd float %xu, %yv
+        %unscaled_imag = fsub float %yu, %xv
+        %scale = fadd float %uu, %vv
+        %out_real = fdiv float %unscaled_real, %scale
+        %out_imag = fdiv float %unscaled_imag, %scale
+        %ret.0 = insertelement <2 x float> undef, i32 0, %out_real
+        %ret.1 = insertelement <2 x float> %ret.0, i32 1, %out_imag
+        return <2 x float> %ret.1
+      }
+
+If the ``complex-range`` attribute is set to ``no-nan`` (or the ``nnan`` or
+``ninf`` flags are specified), an additional range reduction step is necessary.
+
+If the ``complex-range`` attribute is set to ``full``, or is missing entirely,
+then an additional check is necessary after the computation that is necessary
+to recover infinites that are instead represented as NaN values.
+
+Note that when ``complex-range`` is set to ``limited``, and the code is being
+expanded to the IR provided above, the fast-math flags are duplicated onto the
+expanded code. In particular, the ``arcp`` fast math flag may also be useful, as
+it will permit the divisions to be replaced with multiplications with a
+reciprocal instead.
+
 Matrix Intrinsics
 -----------------
 

diff --git a/llvm/include/llvm/CodeGen/ExpandComplex.h b/llvm/include/llvm/CodeGen/ExpandComplex.h
@@ -0,0 +1,22 @@
+//===---- ExpandComplex.h - Expand experimental complex intrinsics --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_EXPANDCOMPLEX_H
+#define LLVM_CODEGEN_EXPANDCOMPLEX_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class ExpandComplexPass : public PassInfoMixin<ExpandComplexPass> {
+public:
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_EXPANDCOMPLEX_H
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -1371,6 +1371,9 @@ enum NodeType {
   // Outputs: [rv], output chain, glue
   PATCHPOINT,
 
+  /// COMPLEX_FMUL - Do a naive complex floating-point multiplication.
+  COMPLEX_FMUL,
+
 // Vector Predication
 #define BEGIN_REGISTER_VP_SDNODE(VPSDID, ...) VPSDID,
 #include "llvm/IR/VPIntrinsics.def"

diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
@@ -506,6 +506,12 @@ namespace llvm {
   /// printing assembly.
   ModulePass *createMachineOutlinerPass(bool RunOnAllFunctions = true);
 
+  /// This pass expands the experimental complex intrinsics into regular
+  /// floating-point arithmetic or calls to __mulsc3 (or similar) functions.
+  FunctionPass *createExpandComplexPass();
+
+  /// This pass expands the experimental reduction intrinsics into sequences of
+  /// shuffles.
   /// This pass expands the reduction intrinsics into sequences of shuffles.
   FunctionPass *createExpandReductionsPass();
 

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -676,6 +676,24 @@ class TargetLoweringBase {
     return false;
   }
 
+  /// Enum that specifies how a C complex type is lowered (in LLVM type terms).
+  enum class ComplexABI {
+    Memory,  ///< Indicates that a pointer to the struct is passed.
+    Vector,  ///< Indicates that T _Complex can be passed as <2 x T>.
+    Struct,  ///< Indicates that T _Complex can be passed as {T, T}.
+    Integer, ///< Indicates that an integer of the same size is passed.
+  };
+
+  /// Returns how a C complex type is lowered when used as the return value.
+  virtual ComplexABI getComplexReturnABI(Type *ScalarFloatTy) const {
+    return ComplexABI::Struct;
+  }
+
+  /// Returns true if the target can match the @llvm.experimental.complex.fmul
+  /// intrinsic with the given type. Such an intrinsic is assumed will only be
+  /// matched when "complex-range" is "limited" or "no-nan".
+  virtual bool CustomLowerComplexMultiply(Type *FloatTy) const { return false; }
+
   /// Return if the target supports combining a
   /// chain like:
   /// \code
@@ -2783,6 +2801,7 @@ class TargetLoweringBase {
     case ISD::AVGCEILU:
     case ISD::ABDS:
     case ISD::ABDU:
+    case ISD::COMPLEX_FMUL:
       return true;
     default: return false;
     }

diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
@@ -1762,6 +1762,43 @@ class IRBuilderBase {
   Value *CreateNAryOp(unsigned Opc, ArrayRef<Value *> Ops,
                       const Twine &Name = "", MDNode *FPMathTag = nullptr);
 
+  /// Construct a complex value out of a pair of real and imaginary values.
+  /// The resulting value will be a vector, with lane 0 being the real value and
+  /// lane 1 being the complex value.
+  /// Either the \p Real or \p Imag parameter may be null, if the input is a
+  /// pure real or pure imaginary number.
+  Value *CreateComplexValue(Value *Real, Value *Imag, const Twine &Name = "") {
+    Type *ScalarTy = (Real ? Real : Imag)->getType();
+    assert(ScalarTy->isFloatingPointTy() &&
+           "Only floating-point types may be complex values.");
+    Type *ComplexTy = FixedVectorType::get(ScalarTy, 2);
+    Value *Base = PoisonValue::get(ComplexTy);
+    if (Real)
+      Base = CreateInsertElement(Base, Real, uint64_t(0), Name);
+    if (Imag)
+      Base = CreateInsertElement(Base, Imag, uint64_t(1), Name);
+    return Base;
+  }
+
+  /// Construct a complex multiply operation, setting fast-math flags and the
+  /// complex-range attribute as appropriate.
+  Value *CreateComplexMul(Value *L, Value *R, bool CxLimitedRange,
+                          const Twine &Name = "");
+
+  /// Construct a complex divide operation, setting fast-math flags and the
+  /// complex-range attribute as appropriate.
+  /// The complex-range attribute is set from the \p IgnoreNaNs and
+  /// \p DisableScaling as follows:
+  ///
+  /// \p IgnoreNans | \p DisableScaling | complex-range value
+  /// ------------- | ----------------- | -------------------
+  /// false         | false             | full
+  /// false         | true              | (illegal combination)
+  /// true          | false             | no-nan
+  /// true          | true              | limited
+  Value *CreateComplexDiv(Value *L, Value *R, bool IgnoreNaNs,
+                          bool DisableScaling = false, const Twine &Name = "");
+
   //===--------------------------------------------------------------------===//
   // Instruction creation methods: Memory Instructions
   //===--------------------------------------------------------------------===//

diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
@@ -2350,6 +2350,16 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
                                          [llvm_anyvector_ty]>;
 }
 
+//===----- Complex math intrinsics ----------------------------------------===//
+
+def int_experimental_complex_fmul: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+                                            [LLVMMatchType<0>,LLVMMatchType<0>],
+                                            [IntrNoMem]>;
+
+def int_experimental_complex_fdiv: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+                                            [LLVMMatchType<0>,LLVMMatchType<0>],
+                                            [IntrNoMem]>;
+
 //===----- Matrix intrinsics ---------------------------------------------===//
 
 def int_matrix_transpose

diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
@@ -111,6 +111,7 @@ void initializeEdgeBundlesPass(PassRegistry&);
 void initializeEHContGuardCatchretPass(PassRegistry &);
 void initializeExpandLargeFpConvertLegacyPassPass(PassRegistry&);
 void initializeExpandLargeDivRemLegacyPassPass(PassRegistry&);
+void initializeExpandComplexPass(PassRegistry &);
 void initializeExpandMemCmpPassPass(PassRegistry&);
 void initializeExpandPostRAPass(PassRegistry&);
 void initializeExpandReductionsPass(PassRegistry&);

diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -770,6 +770,7 @@ def assertsext : SDNode<"ISD::AssertSext", SDT_assert>;
 def assertzext : SDNode<"ISD::AssertZext", SDT_assert>;
 def assertalign : SDNode<"ISD::AssertAlign", SDT_assert>;
 
+def COMPLEX_FMUL : SDNode<"ISD::COMPLEX_FMUL", SDTFPBinOp, [SDNPCommutative]>;
 //===----------------------------------------------------------------------===//
 // Selection DAG Condition Codes
 

diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
@@ -68,6 +68,7 @@ add_llvm_component_library(LLVMCodeGen
   EdgeBundles.cpp
   EHContGuardCatchret.cpp
   ExecutionDomainFix.cpp
+  ExpandComplex.cpp
   ExpandLargeDivRem.cpp
   ExpandLargeFpConvert.cpp
   ExpandMemCmp.cpp