[AArch64] Optimize SVE bitcasts of unpacked types.

efriedma-quic · efriedma-quic · commit 0176ac95035e · 2021-07-01T15:35:48.000-07:00
Target-independent code only knows how to spill to the stack; instead, use AArch64ISD::REINTERPRET_CAST. Differential Revision: https://reviews.llvm.org/D104573
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1192,6 +1192,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
     }
 
+    // Legalize unpacked bitcasts to REINTERPRET_CAST.
+    for (auto VT : {MVT::nxv2i32, MVT::nxv2f32})
+      setOperationAction(ISD::BITCAST, VT, Custom);
+
     for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) {
       setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
       setOperationAction(ISD::SELECT, VT, Custom);
@@ -3508,17 +3512,30 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
   return CallResult.first;
 }
 
+static MVT getSVEContainerType(EVT ContentTy);
+
 SDValue AArch64TargetLowering::LowerBITCAST(SDValue Op,
                                             SelectionDAG &DAG) const {
   EVT OpVT = Op.getValueType();
+  EVT ArgVT = Op.getOperand(0).getValueType();
 
   if (useSVEForFixedLengthVectorVT(OpVT))
     return LowerFixedLengthBitcastToSVE(Op, DAG);
 
+  if (OpVT == MVT::nxv2f32) {
+    if (ArgVT.isInteger()) {
+      SDValue ExtResult =
+          DAG.getNode(ISD::ANY_EXTEND, SDLoc(Op), getSVEContainerType(ArgVT),
+                      Op.getOperand(0));
+      return getSVESafeBitCast(MVT::nxv2f32, ExtResult, DAG);
+    }
+    return getSVESafeBitCast(MVT::nxv2f32, Op.getOperand(0), DAG);
+  }
+
   if (OpVT != MVT::f16 && OpVT != MVT::bf16)
     return SDValue();
 
-  assert(Op.getOperand(0).getValueType() == MVT::i16);
+  assert(ArgVT == MVT::i16);
   SDLoc DL(Op);
 
   Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
@@ -16866,11 +16883,18 @@ bool AArch64TargetLowering::getPostIndexedAddressParts(
   return true;
 }
 
-static void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
-                                  SelectionDAG &DAG) {
+void AArch64TargetLowering::ReplaceBITCASTResults(
+    SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
   SDLoc DL(N);
   SDValue Op = N->getOperand(0);
 
+  if (N->getValueType(0) == MVT::nxv2i32 &&
+      Op.getValueType().isFloatingPoint()) {
+    SDValue CastResult = getSVESafeBitCast(MVT::nxv2i64, Op, DAG);
+    Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::nxv2i32, CastResult));
+    return;
+  }
+
   if (N->getValueType(0) != MVT::i16 ||
       (Op.getValueType() != MVT::f16 && Op.getValueType() != MVT::bf16))
     return;
@@ -18428,8 +18452,6 @@ SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op,
 
   EVT PackedVT = getPackedSVEVectorVT(VT.getVectorElementType());
   EVT PackedInVT = getPackedSVEVectorVT(InVT.getVectorElementType());
-  assert((VT == PackedVT || InVT == PackedInVT) &&
-         "Cannot cast between unpacked scalable vector types!");
 
   // Pack input if required.
   if (InVT != PackedInVT)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1066,6 +1066,8 @@ class AArch64TargetLowering : public TargetLowering {
 
   void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
                           SelectionDAG &DAG) const override;
+  void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
+                             SelectionDAG &DAG) const;
   void ReplaceExtractSubVectorResults(SDNode *N,
                                       SmallVectorImpl<SDValue> &Results,
                                       SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/AArch64/sve-bitcast.ll b/llvm/test/CodeGen/AArch64/sve-bitcast.ll
@@ -450,5 +450,39 @@ define <vscale x 8 x bfloat> @bitcast_double_to_bfloat(<vscale x 2 x double> %v)
   ret <vscale x 8 x bfloat> %bc
 }
 
+define <vscale x 2 x i32> @bitcast_short_float_to_i32(<vscale x 2 x double> %v) #0 {
+; CHECK-LABEL: bitcast_short_float_to_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fcvt z0.s, p0/m, z0.d
+; CHECK-NEXT:    ret
+  %trunc = fptrunc <vscale x 2 x double> %v to <vscale x 2 x float>
+  %bitcast = bitcast <vscale x 2 x float> %trunc to <vscale x 2 x i32>
+  ret <vscale x 2 x i32> %bitcast
+}
+
+define <vscale x 2 x double> @bitcast_short_i32_to_float(<vscale x 2 x i64> %v) #0 {
+; CHECK-LABEL: bitcast_short_i32_to_float:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fcvt z0.d, p0/m, z0.s
+; CHECK-NEXT:    ret
+  %trunc = trunc <vscale x 2 x i64> %v to <vscale x 2 x i32>
+  %bitcast = bitcast <vscale x 2 x i32> %trunc to <vscale x 2 x float>
+  %extended = fpext <vscale x 2 x float> %bitcast to <vscale x 2 x double>
+  ret <vscale x 2 x double> %extended
+}
+
+define <vscale x 2 x float> @bitcast_short_half_to_float(<vscale x 4 x half> %v) #0 {
+; CHECK-LABEL: bitcast_short_half_to_float:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, z0.h
+; CHECK-NEXT:    ret
+  %add = fadd <vscale x 4 x half> %v, %v
+  %bitcast = bitcast <vscale x 4 x half> %add to <vscale x 2 x float>
+  ret <vscale x 2 x float> %bitcast
+}
+
 ; +bf16 is required for the bfloat version.
 attributes #0 = { "target-features"="+sve,+bf16" }