rust-lang
diff --git a/‎llvm/lib/Target/ARM/ARMISelLowering.cpp
Lines changed: 48 additions & 14 deletions b/‎llvm/lib/Target/ARM/ARMISelLowering.cpp
Lines changed: 48 additions & 14 deletions
diff --git a/‎llvm/lib/Target/ARM/ARMSubtarget.cpp
Lines changed: 0 additions & 2 deletions b/‎llvm/lib/Target/ARM/ARMSubtarget.cpp
Lines changed: 0 additions & 2 deletions
diff --git a/‎llvm/lib/Target/ARM/ARMSubtarget.h
Lines changed: 0 additions & 3 deletions b/‎llvm/lib/Target/ARM/ARMSubtarget.h
Lines changed: 0 additions & 3 deletions
diff --git a/‎llvm/test/CodeGen/ARM/atomic-64bit.ll
Lines changed: 13 additions & 13 deletions b/‎llvm/test/CodeGen/ARM/atomic-64bit.ll
Lines changed: 13 additions & 13 deletions
diff --git a/‎llvm/test/CodeGen/ARM/atomic-load-store.ll
Lines changed: 2 additions & 2 deletions b/‎llvm/test/CodeGen/ARM/atomic-load-store.ll
Lines changed: 2 additions & 2 deletions
@@ -1369,6 +1369,29 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     }
   }
 
+  // Compute supported atomic widths.
+  if (Subtarget->isTargetLinux() ||
+      (!Subtarget->isMClass() && Subtarget->hasV6Ops())) {
+    // For targets where __sync_* routines are reliably available, we use them
+    // if necessary.
+    //
+    // ARM Linux always supports 64-bit atomics through kernel-assisted atomic
+    // routines (kernel 3.1 or later). FIXME: Not with compiler-rt?
+    //
+    // ARMv6 targets have native instructions in ARM mode. For Thumb mode,
+    // such targets should provide __sync_* routines, which use the ARM mode
+    // instructions. (ARMv6 doesn't have dmb, but it has an equivalent
+    // encoding; see ARMISD::MEMBARRIER_MCR.)
+    setMaxAtomicSizeInBitsSupported(64);
+  } else if (Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) {
+    // Cortex-M (besides Cortex-M0) have 32-bit atomics.
+    setMaxAtomicSizeInBitsSupported(32);
+  } else {
+    // We can't assume anything about other targets; just use libatomic
+    // routines.
+    setMaxAtomicSizeInBitsSupported(0);
+  }
+
   setOperationAction(ISD::PREFETCH,         MVT::Other, Custom);
 
   // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
@@ -20978,19 +21001,25 @@ ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
   if (AI->isFloatingPointOperation())
     return AtomicExpansionKind::CmpXChg;
 
-  // At -O0, fast-regalloc cannot cope with the live vregs necessary to
-  // implement atomicrmw without spilling. If the target address is also on the
-  // stack and close enough to the spill slot, this can lead to a situation
-  // where the monitor always gets cleared and the atomic operation can never
-  // succeed. So at -O0 lower this operation to a CAS loop.
-  if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
-    return AtomicExpansionKind::CmpXChg;
-
   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
-  bool hasAtomicRMW = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
-  return (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW)
-             ? AtomicExpansionKind::LLSC
-             : AtomicExpansionKind::None;
+  bool hasAtomicRMW;
+  if (Subtarget->isMClass())
+    hasAtomicRMW = Subtarget->hasV8MBaselineOps();
+  else if (Subtarget->isThumb())
+    hasAtomicRMW = Subtarget->hasV7Ops();
+  else
+    hasAtomicRMW = Subtarget->hasV6Ops();
+  if (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) {
+    // At -O0, fast-regalloc cannot cope with the live vregs necessary to
+    // implement atomicrmw without spilling. If the target address is also on
+    // the stack and close enough to the spill slot, this can lead to a
+    // situation where the monitor always gets cleared and the atomic operation
+    // can never succeed. So at -O0 lower this operation to a CAS loop.
+    if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
+      return AtomicExpansionKind::CmpXChg;
+    return AtomicExpansionKind::LLSC;
+  }
+  return AtomicExpansionKind::None;
 }
 
 // Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used  up to 32
@@ -21003,8 +21032,13 @@ ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
   // situation where the monitor always gets cleared and the atomic operation
   // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
   unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits();
-  bool HasAtomicCmpXchg =
-      !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
+  bool HasAtomicCmpXchg;
+  if (Subtarget->isMClass())
+    HasAtomicCmpXchg = Subtarget->hasV8MBaselineOps();
+  else if (Subtarget->isThumb())
+    HasAtomicCmpXchg = Subtarget->hasV7Ops();
+  else
+    HasAtomicCmpXchg = Subtarget->hasV6Ops();
   if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg &&
       Size <= (Subtarget->isMClass() ? 32U : 64U))
     return AtomicExpansionKind::LLSC;
 
@@ -411,8 +411,6 @@ bool ARMSubtarget::enablePostRAMachineScheduler() const {
   return !isThumb1Only();
 }
 
-bool ARMSubtarget::enableAtomicExpand() const { return hasAnyDataBarrier(); }
-
 bool ARMSubtarget::useStride4VFPs() const {
   // For general targets, the prologue can grow when VFPs are allocated with
   // stride 4 (more vpush instructions). But WatchOS uses a compact unwind
 
@@ -478,9 +478,6 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
   /// scheduling, DAGCombine, etc.).
   bool useAA() const override { return true; }
 
-  // enableAtomicExpand- True if we need to expand our atomics.
-  bool enableAtomicExpand() const override;
-
   /// getInstrItins - Return the instruction itineraries based on subtarget
   /// selection.
   const InstrItineraryData *getInstrItineraryData() const override {
 
@@ -30,7 +30,7 @@ define i64 @test1(i64* %ptr, i64 %val) {
 ; CHECK-THUMB: bne
 ; CHECK-THUMB: dmb {{ish$}}
 
-; CHECK-M: __sync_fetch_and_add_8
+; CHECK-M: __atomic_fetch_add_8
 
   %r = atomicrmw add i64* %ptr, i64 %val seq_cst
   ret i64 %r
@@ -61,7 +61,7 @@ define i64 @test2(i64* %ptr, i64 %val) {
 ; CHECK-THUMB: bne
 ; CHECK-THUMB: dmb {{ish$}}
 
-; CHECK-M: __sync_fetch_and_sub_8
+; CHECK-M: __atomic_fetch_sub_8
 
   %r = atomicrmw sub i64* %ptr, i64 %val seq_cst
   ret i64 %r
@@ -92,7 +92,7 @@ define i64 @test3(i64* %ptr, i64 %val) {
 ; CHECK-THUMB: bne
 ; CHECK-THUMB: dmb {{ish$}}
 
-; CHECK-M: __sync_fetch_and_and_8
+; CHECK-M: _atomic_fetch_and_8
 
   %r = atomicrmw and i64* %ptr, i64 %val seq_cst
   ret i64 %r
@@ -123,7 +123,7 @@ define i64 @test4(i64* %ptr, i64 %val) {
 ; CHECK-THUMB: bne
 ; CHECK-THUMB: dmb {{ish$}}
 
-; CHECK-M: __sync_fetch_and_or_8
+; CHECK-M: __atomic_fetch_or_8
 
   %r = atomicrmw or i64* %ptr, i64 %val seq_cst
   ret i64 %r
@@ -154,7 +154,7 @@ define i64 @test5(i64* %ptr, i64 %val) {
 ; CHECK-THUMB: bne
 ; CHECK-THUMB: dmb {{ish$}}
 
-; CHECK-M: __sync_fetch_and_xor_8
+; CHECK-M: __atomic_fetch_xor_8
 
   %r = atomicrmw xor i64* %ptr, i64 %val seq_cst
   ret i64 %r
@@ -177,7 +177,7 @@ define i64 @test6(i64* %ptr, i64 %val) {
 ; CHECK-THUMB: bne
 ; CHECK-THUMB: dmb {{ish$}}
 
-; CHECK-M: __sync_lock_test_and_set_8
+; CHECK-M: __atomic_exchange_8
 
   %r = atomicrmw xchg i64* %ptr, i64 %val seq_cst
   ret i64 %r
@@ -213,7 +213,7 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
 ; CHECK-THUMB: beq
 ; CHECK-THUMB: dmb {{ish$}}
 
-; CHECK-M: __sync_val_compare_and_swap_8
+; CHECK-M: __atomic_compare_exchange_8
 
   %pair = cmpxchg i64* %ptr, i64 %val1, i64 %val2 seq_cst seq_cst
   %r = extractvalue { i64, i1 } %pair, 0
@@ -237,7 +237,7 @@ define i64 @test8(i64* %ptr) {
 ; CHECK-THUMB-NOT: strexd
 ; CHECK-THUMB: dmb {{ish$}}
 
-; CHECK-M: __sync_val_compare_and_swap_8
+; CHECK-M: __atomic_load_8
 
   %r = load atomic i64, i64* %ptr seq_cst, align 8
   ret i64 %r
@@ -263,7 +263,7 @@ define void @test9(i64* %ptr, i64 %val) {
 ; CHECK-THUMB: bne
 ; CHECK-THUMB: dmb {{ish$}}
 
-; CHECK-M: __sync_lock_test_and_set_8
+; CHECK-M: __atomic_store_8
 
   store atomic i64 %val, i64* %ptr seq_cst, align 8
   ret void
@@ -308,7 +308,7 @@ define i64 @test10(i64* %ptr, i64 %val) {
 ; CHECK-THUMB: bne
 ; CHECK-THUMB: dmb {{ish$}}
 
-; CHECK-M: __sync_fetch_and_min_8
+; CHECK-M: __atomic_compare_exchange_8
 
   %r = atomicrmw min i64* %ptr, i64 %val seq_cst
   ret i64 %r
@@ -353,7 +353,7 @@ define i64 @test11(i64* %ptr, i64 %val) {
 ; CHECK-THUMB: bne
 ; CHECK-THUMB: dmb {{ish$}}
 
-; CHECK-M: __sync_fetch_and_umin_8
+; CHECK-M: __atomic_compare_exchange_8
 
   %r = atomicrmw umin i64* %ptr, i64 %val seq_cst
   ret i64 %r
@@ -398,7 +398,7 @@ define i64 @test12(i64* %ptr, i64 %val) {
 ; CHECK-THUMB: bne
 ; CHECK-THUMB: dmb {{ish$}}
 
-; CHECK-M: __sync_fetch_and_max_8
+; CHECK-M: __atomic_compare_exchange_8
 
   %r = atomicrmw max i64* %ptr, i64 %val seq_cst
   ret i64 %r
@@ -443,7 +443,7 @@ define i64 @test13(i64* %ptr, i64 %val) {
 ; CHECK-THUMB: bne
 ; CHECK-THUMB: dmb {{ish$}}
 
-; CHECK-M: __sync_fetch_and_umax_8
+; CHECK-M: __atomic_compare_exchange_8
 
   %r = atomicrmw umax i64* %ptr, i64 %val seq_cst
   ret i64 %r
 
@@ -94,14 +94,14 @@ define void @test4(i8* %ptr1, i8* %ptr2) {
 
 define i64 @test_old_load_64bit(i64* %p) {
 ; ARMV4-LABEL: test_old_load_64bit
-; ARMV4: ___sync_val_compare_and_swap_8
+; ARMV4: ___atomic_load_8
   %1 = load atomic i64, i64* %p seq_cst, align 8
   ret i64 %1
 }
 
 define void @test_old_store_64bit(i64* %p, i64 %v) {
 ; ARMV4-LABEL: test_old_store_64bit
-; ARMV4: ___sync_lock_test_and_set_8
+; ARMV4: ___atomic_store_8
   store atomic i64 %v, i64* %p seq_cst, align 8
   ret void
 }
Original file line number	Diff line number	Diff line change
`@@ -411,8 +411,6 @@ bool ARMSubtarget::enablePostRAMachineScheduler() const {`
`411`	`411`	`return !isThumb1Only();`
`412`	`412`	`}`
`413`	`413`
`414`		`-bool ARMSubtarget::enableAtomicExpand() const { return hasAnyDataBarrier(); }`
`415`		`-`
`416`	`414`	`bool ARMSubtarget::useStride4VFPs() const {`
`417`	`415`	`// For general targets, the prologue can grow when VFPs are allocated with`
`418`	`416`	`// stride 4 (more vpush instructions). But WatchOS uses a compact unwind`