[CodeGen] Allow mixed scalar type constraints for inline asm

dfszabo · dfszabo · commit 0a3516b2a3e1 · 2024-02-29T20:25:48.000+01:00
GCC supports code like "asm volatile ("" : "=r" (i) : "0" (f))" where i is integer type and f is floating point type. Currently this code produces an error with Clang. The change allows mixed scalar types between input and output constraints.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -9041,8 +9041,11 @@ static void patchMatchingInput(const SDISelAsmOperandInfo &OpInfo,
   std::pair<unsigned, const TargetRegisterClass *> InputRC =
       TLI.getRegForInlineAsmConstraint(TRI, MatchingOpInfo.ConstraintCode,
                                        MatchingOpInfo.ConstraintVT);
-  if ((OpInfo.ConstraintVT.isInteger() !=
-       MatchingOpInfo.ConstraintVT.isInteger()) ||
+  const bool OutOpIsIntOrFP =
+      OpInfo.ConstraintVT.isInteger() || OpInfo.ConstraintVT.isFloatingPoint();
+  const bool InOpInfoIsIntOrFP = MatchingOpInfo.ConstraintVT.isInteger() ||
+                                 MatchingOpInfo.ConstraintVT.isFloatingPoint();
+  if ((OutOpIsIntOrFP != InOpInfoIsIntOrFP) ||
       (MatchRC.second != InputRC.second)) {
     // FIXME: error out in a more elegant fashion
     report_fatal_error("Unsupported asm: input constraint"
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -5772,8 +5772,11 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
         std::pair<unsigned, const TargetRegisterClass *> InputRC =
             getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
                                          Input.ConstraintVT);
-        if ((OpInfo.ConstraintVT.isInteger() !=
-             Input.ConstraintVT.isInteger()) ||
+        const bool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() ||
+                                    OpInfo.ConstraintVT.isFloatingPoint();
+        const bool InOpIsIntOrFP = Input.ConstraintVT.isInteger() ||
+                                   Input.ConstraintVT.isFloatingPoint();
+        if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||
             (MatchRC.second != InputRC.second)) {
           report_fatal_error("Unsupported asm: input constraint"
                              " with a matching output constraint of"
diff --git a/llvm/test/CodeGen/X86/inline-asm-int-to-fp.ll b/llvm/test/CodeGen/X86/inline-asm-int-to-fp.ll
@@ -0,0 +1,72 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+; C source used for generating this test:
+
+; unsigned test(float f)
+; {
+;    unsigned i;
+;    asm volatile ("" : "=r" (i) : "0" (f));
+;    return i;
+; }
+
+
+define i32 @test_int_float(float %f) {
+; CHECK-LABEL: test_int_float:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movss %xmm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    retq
+entry:
+  %f.addr = alloca float, align 4
+  %i = alloca i32, align 4
+  store float %f, ptr %f.addr, align 4
+  %load_f = load float, ptr %f.addr, align 4
+  %asm_call = call i32 asm sideeffect "", "=r,0,~{dirflag},~{fpsr},~{flags}"(float %load_f)
+  store i32 %asm_call, ptr %i, align 4
+  %load_i = load i32, ptr %i, align 4
+  ret i32 %load_i
+}
+
+define i32 @test_int_ptr(float* %f) {
+; CHECK-LABEL: test_int_ptr:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT:    retq
+entry:
+  %f.addr = alloca float*, align 4
+  %i = alloca i32, align 4
+  store float* %f, ptr %f.addr, align 4
+  %load_f = load float*, ptr %f.addr, align 4
+  %asm_call = call i32 asm sideeffect "", "=r,0,~{dirflag},~{fpsr},~{flags}"(float* %load_f)
+  store i32 %asm_call, ptr %i, align 4
+  %load_i = load i32, ptr %i, align 4
+  ret i32 %load_i
+}
+
+define <4 x i32> @test_int_vec(<4 x float> %f) {
+; CHECK-LABEL: test_int_vec:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    retq
+entry:
+  %f.addr = alloca <4 x float>, align 16
+  %i = alloca <4 x i32>, align 16
+  store <4 x float> %f, <4 x float>* %f.addr, align 16
+  %load_f = load <4 x float>, <4 x float>* %f.addr, align 16
+  %asm_call = call <4 x i32> asm sideeffect "", "=v,0,~{dirflag},~{fpsr},~{flags}"(<4 x float> %load_f)
+  store <4 x i32> %asm_call, <4 x i32>* %i, align 16
+  %load_i = load <4 x i32>, <4 x i32>* %i, align 16
+  ret <4 x i32> %load_i
+}