Remove fp<->int bitcasting in SystemZ backend.

JonPsson1 · JonPsson1 · commit 305fbe6ffbf5 · 2024-03-12T16:46:43.000-04:00
diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -1761,21 +1761,12 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
 
   case ISD::ATOMIC_STORE: {
     auto *AtomOp = cast<AtomicSDNode>(Node);
-    // Store FP values directly without first moving to a GPR. This is needed
-    // as long as clang always emits the cast to integer.
-    EVT SVT = AtomOp->getMemoryVT();
-    SDValue StoredVal = AtomOp->getVal();
-    if (SVT.isInteger() && StoredVal->getOpcode() == ISD::BITCAST &&
-        StoredVal->getOperand(0).getValueType().isFloatingPoint()) {
-      StoredVal = StoredVal->getOperand(0);
-      SVT = StoredVal.getValueType();
-    }
     // Replace the atomic_store with a regular store and select it. This is
     // ok since we know all store instructions <= 8 bytes are atomic, and the
     // 16 byte case is already handled during lowering.
     StoreSDNode *St = cast<StoreSDNode>(CurDAG->getTruncStore(
-        AtomOp->getChain(), SDLoc(AtomOp), StoredVal, AtomOp->getBasePtr(), SVT,
-        AtomOp->getMemOperand()));
+         AtomOp->getChain(), SDLoc(AtomOp), AtomOp->getVal(),
+         AtomOp->getBasePtr(), AtomOp->getMemoryVT(), AtomOp->getMemOperand()));
     assert(St->getMemOperand()->isAtomic() && "Broken MMO.");
     SDNode *Chain = St;
     // We have to enforce sequential consistency by performing a
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -695,8 +695,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom);
 
   // Codes for which we want to perform some z-specific combinations.
-  setTargetDAGCombine({ISD::BITCAST,
-                       ISD::ZERO_EXTEND,
+  setTargetDAGCombine({ISD::ZERO_EXTEND,
                        ISD::SIGN_EXTEND,
                        ISD::SIGN_EXTEND_INREG,
                        ISD::LOAD,
@@ -916,7 +915,6 @@ bool SystemZTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const
   return false;
 }
 
-// FIXME: Clang emits these casts always regardless of these hooks.
 TargetLowering::AtomicExpansionKind
 SystemZTargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const {
   // Lower fp128 the same way as i128.
@@ -6597,32 +6595,6 @@ static SDValue extendAtomicLoad(AtomicSDNode *ALoad, EVT VT, SelectionDAG &DAG,
   return SDValue(NewALoad, 0);
 }
 
-SDValue SystemZTargetLowering::combineBITCAST(SDNode *N,
-                                              DAGCombinerInfo &DCI) const {
-  SelectionDAG &DAG = DCI.DAG;
-  SDValue N0 = N->getOperand(0);
-  EVT InVT = N0.getValueType();
-  EVT ResVT = N->getValueType(0);
-  // Handle atomic loads to load float/double values directly and not via a
-  // GPR. Do it before legalization to help in treating the ATOMIC_LOAD the
-  // same way as a LOAD, and e.g. emit a REPLICATE. FIXME: This is only
-  // needed because clang currently emits these casts always.
-  if (auto *ALoad = dyn_cast<AtomicSDNode>(N0))
-    if (ALoad->getOpcode() == ISD::ATOMIC_LOAD && InVT.getSizeInBits() <= 64 &&
-        ALoad->getExtensionType() == ISD::NON_EXTLOAD &&
-        SDValue(ALoad, 0).hasOneUse() && InVT.isInteger() &&
-        ResVT.isFloatingPoint()) {
-      SDValue Res = DAG.getAtomic(ISD::ATOMIC_LOAD, SDLoc(N), ResVT, ResVT,
-                                  ALoad->getChain(), ALoad->getBasePtr(),
-                                  ALoad->getMemOperand());
-      // Update the chain uses.
-      DAG.ReplaceAllUsesOfValueWith(SDValue(ALoad, 1), Res.getValue(1));
-      return Res;
-    }
-
-  return SDValue();
-}
-
 SDValue SystemZTargetLowering::combineZERO_EXTEND(
     SDNode *N, DAGCombinerInfo &DCI) const {
   // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
@@ -7683,7 +7655,6 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
                                                  DAGCombinerInfo &DCI) const {
   switch(N->getOpcode()) {
   default: break;
-  case ISD::BITCAST:            return combineBITCAST(N, DCI);
   case ISD::ZERO_EXTEND:        return combineZERO_EXTEND(N, DCI);
   case ISD::SIGN_EXTEND:        return combineSIGN_EXTEND(N, DCI);
   case ISD::SIGN_EXTEND_INREG:  return combineSIGN_EXTEND_INREG(N, DCI);
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -724,7 +724,6 @@ class SystemZTargetLowering : public TargetLowering {
                          bool Force) const;
   SDValue combineTruncateExtract(const SDLoc &DL, EVT TruncVT, SDValue Op,
                                  DAGCombinerInfo &DCI) const;
-  SDValue combineBITCAST(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineZERO_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineSIGN_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineSIGN_EXTEND_INREG(SDNode *N, DAGCombinerInfo &DCI) const;
diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -538,7 +538,7 @@ def zext32 : PatFrag<(ops node:$src), (zext (i32 node:$src))>;
 def z_load : PatFrags<(ops node:$ptr),
                       [(load node:$ptr),
                        (atomic_load node:$ptr)], [{
-  if (auto *AL = dyn_cast<AtomicSDNode>(N))  // XXXX getLoadExtType?
+  if (auto *AL = dyn_cast<AtomicSDNode>(N))
     if (AL->getExtensionType() != ISD::NON_EXTLOAD)
       return false;
   return true;
@@ -680,7 +680,7 @@ def z_any_extloadf64 : PatFrags<(ops node:$ptr),
 class AlignedLoad<SDPatternOperator load>
   : PatFrag<(ops node:$addr), (load node:$addr),
   [{ return storeLoadIsAligned(N); }]>;
-def aligned_z_load       : AlignedLoad<z_load>;
+def aligned_z_load         : AlignedLoad<z_load>;
 def aligned_z_asextloadi16 : AlignedLoad<z_asextloadi16>;
 def aligned_z_asextloadi32 : AlignedLoad<z_asextloadi32>;
 def aligned_z_azextloadi16 : AlignedLoad<z_azextloadi16>;
diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td
@@ -1785,7 +1785,7 @@ defm : TRUNC64m<truncstorei8, ST1Brri, ST1Brii, ST1Bzri, ST1Bzii>;
 defm : TRUNC64m<truncstorei16, ST2Brri, ST2Brii, ST2Bzri, ST2Bzii>;
 defm : TRUNC64m<truncstorei32, STLrri, STLrii, STLzri, ST1Bzii>;
 
-// Atomic loads
+// Atomic loads (FIXME: replace iAny with the correct integer VT:)
 multiclass ATMLDm<SDPatternOperator from,
                   RM torri, RM torii,
                   RM tozri, RM tozii> {
diff --git a/llvm/test/CodeGen/SystemZ/atomic-memops.ll b/llvm/test/CodeGen/SystemZ/atomic-memops.ll
@@ -408,19 +408,6 @@ define void @f25(ptr %src, ptr %dst) {
   ret void
 }
 
-define void @f25_b(ptr %src, ptr %dst) {
-; CHECK-LABEL: f25_b:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vlrepf %v0, 0(%r2)
-; CHECK-NEXT:    vst %v0, 0(%r3), 3
-; CHECK-NEXT:    br %r14
-  %l = load atomic i32, ptr %src seq_cst, align 4
-  %b = bitcast i32 %l to float
-  %v = insertelement <4 x float> undef, float %b, i32 1
-  store volatile <4 x float> %v, ptr %dst
-  ret void
-}
-
 ; Do *not* use vlrep for an extending load.
 define <4 x i32> @f25_c(ptr %ptr) {
 ; CHECK-LABEL: f25_c:
@@ -466,21 +453,6 @@ define void @f26(ptr %src, ptr %dst) {
   ret void
 }
 
-define void @f26_b(ptr %src, ptr %dst) {
-; CHECK-LABEL: f26_b:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vlrepg %v0, 0(%r2)
-; CHECK-NEXT:    vst %v0, 0(%r3), 3
-; CHECK-NEXT:    br %r14
-  %l = load atomic i64, ptr %src seq_cst, align 8
-  %b = bitcast i64 %l to double
-  %v = insertelement <2 x double> undef, double %b, i32 0
-  store volatile <2 x double> %v, ptr %dst
-  ret void
-}
-
-
-
 ; Vector Load logical element and zero.
 define <16 x i8> @f27(ptr %ptr) {
 ; CHECK-LABEL: f27:
@@ -583,40 +555,6 @@ define <2 x i64> @f36(<2 x i64> %val, ptr %ptr) {
   ret <2 x i64> %ret
 }
 
-; Test that fp values are loaded/stored directly. Clang FE currently always
-; emits atomic load/stores casted this way.
-define void @f37(ptr %src, ptr %dst) {
-; CHECK-LABEL: f37:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    ld %f0, 0(%r2)
-; CHECK-NEXT:    adbr %f0, %f0
-; CHECK-NEXT:    std %f0, 0(%r3)
-; CHECK-NEXT:    bcr 14, %r0
-; CHECK-NEXT:    br %r14
-  %atomic-load = load atomic i64, ptr %src seq_cst, align 8
-  %bc0 = bitcast i64 %atomic-load to double
-  %fa = fadd double %bc0, %bc0
-  %bc1 = bitcast double %fa to i64
-  store atomic i64 %bc1, ptr %dst seq_cst, align 8
-  ret void
-}
-
-define void @f38(ptr %src, ptr %dst) {
-; CHECK-LABEL: f38:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lde %f0, 0(%r2)
-; CHECK-NEXT:    aebr %f0, %f0
-; CHECK-NEXT:    ste %f0, 0(%r3)
-; CHECK-NEXT:    bcr 14, %r0
-; CHECK-NEXT:    br %r14
-  %atomic-load = load atomic i32, ptr %src seq_cst, align 8
-  %bc0 = bitcast i32 %atomic-load to float
-  %fa = fadd float %bc0, %bc0
-  %bc1 = bitcast float %fa to i32
-  store atomic i32 %bc1, ptr %dst seq_cst, align 8
-  ret void
-}
-
 ; Test operation on memory involving atomic load and store.
 define void @f39(ptr %ptr) {
 ; CHECK-LABEL: f39:
@@ -676,7 +614,7 @@ define void @f43(ptr %ptr) {
 define void @f44(ptr %ptr) {
 ; CHECK-LABEL: f44:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    larl %r1, .LCPI53_0
+; CHECK-NEXT:    larl %r1, .LCPI49_0
 ; CHECK-NEXT:    ld %f0, 0(%r1)
 ; CHECK-NEXT:    std %f0, 0(%r2)
 ; CHECK-NEXT:    bcr 14, %r0
@@ -767,8 +705,7 @@ define void @f51(ptr %src, ptr %dst) {
   %b0 = bitcast i128 %atomic-load to <4 x float>
   %vecext = extractelement <4 x float> %b0, i64 0
   %add = fadd float %vecext, 1.000000e+00
-  %b1 = bitcast float %add to i32
-  store atomic i32 %b1, ptr %dst seq_cst, align 4
+  store atomic float %add, ptr %dst seq_cst, align 4
   ret void
 }
 
@@ -786,8 +723,7 @@ define void @f52(ptr %src, ptr %dst) {
   %b0 = bitcast i128 %atomic-load to <2 x double>
   %vecext = extractelement <2 x double> %b0, i64 0
   %add = fadd double %vecext, 1.000000e+00
-  %b1 = bitcast double %add to i64
-  store atomic i64 %b1, ptr %dst seq_cst, align 8
+  store atomic double %add, ptr %dst seq_cst, align 8
   ret void
 }