[SelectionDAG][RISCV] Use TypeSize version of ComputeValueVTs in TargetLowering::LowerCallTo. (#86166)

topperc · web-flow · commit c67ed2f1e12e · 2024-03-21T20:35:08.000-07:00
This is needed to support non-intrinsic functions returning tuple types which are represented as structs with scalable vector types in IR. I suspect this may have been broken since https://reviews.llvm.org/D158115
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -10500,14 +10500,14 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
   CLI.Ins.clear();
   Type *OrigRetTy = CLI.RetTy;
   SmallVector<EVT, 4> RetTys;
-  SmallVector<uint64_t, 4> Offsets;
+  SmallVector<TypeSize, 4> Offsets;
   auto &DL = CLI.DAG.getDataLayout();
-  ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets, 0);
+  ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets);
 
   if (CLI.IsPostTypeLegalization) {
     // If we are lowering a libcall after legalization, split the return type.
     SmallVector<EVT, 4> OldRetTys;
-    SmallVector<uint64_t, 4> OldOffsets;
+    SmallVector<TypeSize, 4> OldOffsets;
     RetTys.swap(OldRetTys);
     Offsets.swap(OldOffsets);
 
@@ -10519,7 +10519,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
       unsigned RegisterVTByteSZ = RegisterVT.getSizeInBits() / 8;
       RetTys.append(NumRegs, RegisterVT);
       for (unsigned j = 0; j != NumRegs; ++j)
-        Offsets.push_back(Offset + j * RegisterVTByteSZ);
+        Offsets.push_back(TypeSize::getFixed(Offset + j * RegisterVTByteSZ));
     }
   }
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll
@@ -86,3 +86,79 @@ define <vscale x 32 x i32> @caller_scalable_vector_split_indirect(<vscale x 32 x
   %a = call <vscale x 32 x i32> @callee_scalable_vector_split_indirect(<vscale x 32 x i32> zeroinitializer, <vscale x 32 x i32> %x)
   ret <vscale x 32 x i32> %a
 }
+
+define {<vscale x 4 x i32>, <vscale x 4 x i32>} @caller_tuple_return() {
+; RV32-LABEL: caller_tuple_return:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    call callee_tuple_return
+; RV32-NEXT:    vmv2r.v v12, v8
+; RV32-NEXT:    vmv2r.v v8, v10
+; RV32-NEXT:    vmv2r.v v10, v12
+; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: caller_tuple_return:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    call callee_tuple_return
+; RV64-NEXT:    vmv2r.v v12, v8
+; RV64-NEXT:    vmv2r.v v8, v10
+; RV64-NEXT:    vmv2r.v v10, v12
+; RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %a = call {<vscale x 4 x i32>, <vscale x 4 x i32>} @callee_tuple_return()
+  %b = extractvalue {<vscale x 4 x i32>, <vscale x 4 x i32>} %a, 0
+  %c = extractvalue {<vscale x 4 x i32>, <vscale x 4 x i32>} %a, 1
+  %d = insertvalue {<vscale x 4 x i32>, <vscale x 4 x i32>} poison, <vscale x 4 x i32> %c, 0
+  %e = insertvalue {<vscale x 4 x i32>, <vscale x 4 x i32>} %d, <vscale x 4 x i32> %b, 1
+  ret {<vscale x 4 x i32>, <vscale x 4 x i32>} %e
+}
+
+declare {<vscale x 4 x i32>, <vscale x 4 x i32>} @callee_tuple_return()
+
+define void @caller_tuple_argument({<vscale x 4 x i32>, <vscale x 4 x i32>} %x) {
+; RV32-LABEL: caller_tuple_argument:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    vmv2r.v v12, v8
+; RV32-NEXT:    vmv2r.v v8, v10
+; RV32-NEXT:    vmv2r.v v10, v12
+; RV32-NEXT:    call callee_tuple_argument
+; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: caller_tuple_argument:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    vmv2r.v v12, v8
+; RV64-NEXT:    vmv2r.v v8, v10
+; RV64-NEXT:    vmv2r.v v10, v12
+; RV64-NEXT:    call callee_tuple_argument
+; RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %a = extractvalue {<vscale x 4 x i32>, <vscale x 4 x i32>} %x, 0
+  %b = extractvalue {<vscale x 4 x i32>, <vscale x 4 x i32>} %x, 1
+  %c = insertvalue {<vscale x 4 x i32>, <vscale x 4 x i32>} poison, <vscale x 4 x i32> %b, 0
+  %d = insertvalue {<vscale x 4 x i32>, <vscale x 4 x i32>} %c, <vscale x 4 x i32> %a, 1
+  call void @callee_tuple_argument({<vscale x 4 x i32>, <vscale x 4 x i32>} %d)
+  ret void
+}
+
+declare void @callee_tuple_argument({<vscale x 4 x i32>, <vscale x 4 x i32>})