-
Notifications
You must be signed in to change notification settings - Fork 14.3k
AArch64/ARM/PPC/X86: Add some atomic tests #92933
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
FP typed atomic load/store coverage was mostly missing, especially for half and bfloat.
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-backend-powerpc Author: Matt Arsenault (arsenm) ChangesFP typed atomic load/store coverage was mostly missing, especially for half and bfloat. Patch is 35.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/92933.diff 5 Files Affected:
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-lse2.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-lse2.ll
index e7e231bc344d9..3732d4feb0c67 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-lse2.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-lse2.ll
@@ -566,6 +566,119 @@ define dso_local i128 @load_atomic_i128_unaligned_seq_cst_const(ptr readonly %pt
%r = load atomic i128, ptr %ptr seq_cst, align 1
ret i128 %r
}
+
+define dso_local half @load_atomic_f16_aligned_unordered(ptr %ptr) {
+; CHECK-LABEL: load_atomic_f16_aligned_unordered:
+; CHECK: ldrh w8, [x0]
+ %r = load atomic half, ptr %ptr unordered, align 2
+ ret half %r
+}
+
+define dso_local half @load_atomic_f16_aligned_unordered_const(ptr readonly %ptr) {
+; CHECK-LABEL: load_atomic_f16_aligned_unordered_const:
+; CHECK: ldrh w8, [x0]
+ %r = load atomic half, ptr %ptr unordered, align 2
+ ret half %r
+}
+
+define dso_local half @load_atomic_f16_aligned_monotonic(ptr %ptr) {
+; CHECK-LABEL: load_atomic_f16_aligned_monotonic:
+; CHECK: ldrh w8, [x0]
+ %r = load atomic half, ptr %ptr monotonic, align 2
+ ret half %r
+}
+
+define dso_local half @load_atomic_f16_aligned_monotonic_const(ptr readonly %ptr) {
+; CHECK-LABEL: load_atomic_f16_aligned_monotonic_const:
+; CHECK: ldrh w8, [x0]
+ %r = load atomic half, ptr %ptr monotonic, align 2
+ ret half %r
+}
+
+define dso_local half @load_atomic_f16_aligned_acquire(ptr %ptr) {
+; CHECK-LABEL: load_atomic_f16_aligned_acquire:
+; CHECK: ldarh w8, [x0]
+ %r = load atomic half, ptr %ptr acquire, align 2
+ ret half %r
+}
+
+define dso_local half @load_atomic_f16_aligned_acquire_const(ptr readonly %ptr) {
+; CHECK-LABEL: load_atomic_f16_aligned_acquire_const:
+; CHECK: ldarh w8, [x0]
+ %r = load atomic half, ptr %ptr acquire, align 2
+ ret half %r
+}
+
+define dso_local half @load_atomic_f16_aligned_seq_cst(ptr %ptr) {
+; CHECK-LABEL: load_atomic_f16_aligned_seq_cst:
+; CHECK: ldarh w8, [x0]
+ %r = load atomic half, ptr %ptr seq_cst, align 2
+ ret half %r
+}
+
+define dso_local half @load_atomic_f16_aligned_seq_cst_const(ptr readonly %ptr) {
+; CHECK-LABEL: load_atomic_f16_aligned_seq_cst_const:
+; CHECK: ldarh w8, [x0]
+ %r = load atomic half, ptr %ptr seq_cst, align 2
+ ret half %r
+}
+
+define dso_local bfloat @load_atomic_bf16_aligned_unordered(ptr %ptr) {
+; CHECK-LABEL: load_atomic_bf16_aligned_unordered:
+; CHECK: ldrh w8, [x0]
+ %r = load atomic bfloat, ptr %ptr unordered, align 2
+ ret bfloat %r
+}
+
+define dso_local bfloat @load_atomic_bf16_aligned_unordered_const(ptr readonly %ptr) {
+; CHECK-LABEL: load_atomic_bf16_aligned_unordered_const:
+; CHECK: ldrh w8, [x0]
+ %r = load atomic bfloat, ptr %ptr unordered, align 2
+ ret bfloat %r
+}
+
+define dso_local bfloat @load_atomic_bf16_aligned_monotonic(ptr %ptr) {
+; CHECK-LABEL: load_atomic_bf16_aligned_monotonic:
+; CHECK: ldrh w8, [x0]
+ %r = load atomic bfloat, ptr %ptr monotonic, align 2
+ ret bfloat %r
+}
+
+define dso_local bfloat @load_atomic_bf16_aligned_monotonic_const(ptr readonly %ptr) {
+; CHECK-LABEL: load_atomic_bf16_aligned_monotonic_const:
+; CHECK: ldrh w8, [x0]
+ %r = load atomic bfloat, ptr %ptr monotonic, align 2
+ ret bfloat %r
+}
+
+define dso_local bfloat @load_atomic_bf16_aligned_acquire(ptr %ptr) {
+; CHECK-LABEL: load_atomic_bf16_aligned_acquire:
+; CHECK: ldarh w8, [x0]
+ %r = load atomic bfloat, ptr %ptr acquire, align 2
+ ret bfloat %r
+}
+
+define dso_local bfloat @load_atomic_bf16_aligned_acquire_const(ptr readonly %ptr) {
+; CHECK-LABEL: load_atomic_bf16_aligned_acquire_const:
+; CHECK: ldarh w8, [x0]
+ %r = load atomic bfloat, ptr %ptr acquire, align 2
+ ret bfloat %r
+}
+
+define dso_local bfloat @load_atomic_bf16_aligned_seq_cst(ptr %ptr) {
+; CHECK-LABEL: load_atomic_bf16_aligned_seq_cst:
+; CHECK: ldarh w8, [x0]
+ %r = load atomic bfloat, ptr %ptr seq_cst, align 2
+ ret bfloat %r
+}
+
+define dso_local bfloat @load_atomic_bf16_aligned_seq_cst_const(ptr readonly %ptr) {
+; CHECK-LABEL: load_atomic_bf16_aligned_seq_cst_const:
+; CHECK: ldarh w8, [x0]
+ %r = load atomic bfloat, ptr %ptr seq_cst, align 2
+ ret bfloat %r
+}
+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; -O0: {{.*}}
; -O1: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/relaxed-fp-atomics.ll b/llvm/test/CodeGen/AArch64/relaxed-fp-atomics.ll
index 95abbb6979be8..af664549a472a 100644
--- a/llvm/test/CodeGen/AArch64/relaxed-fp-atomics.ll
+++ b/llvm/test/CodeGen/AArch64/relaxed-fp-atomics.ll
@@ -91,4 +91,94 @@ define void @atomic_store_relaxed_f64(ptr %p, i32 %off32, i64 %off64, double %va
ret void
}
+define half @atomic_load_relaxed_f16(ptr %p, i32 %off32, i64 %off64) #0 {
+; CHECK-LABEL: atomic_load_relaxed_f16:
+ %ptr_unsigned = getelementptr half, ptr %p, i32 4095
+ %val_unsigned = load atomic half, ptr %ptr_unsigned monotonic, align 4
+; CHECK: ldrh {{w[0-9]+}}, [x0, #8190]
+
+ %ptr_regoff = getelementptr half, ptr %p, i32 %off32
+ %val_regoff = load atomic half, ptr %ptr_regoff unordered, align 4
+ %tot1 = fadd half %val_unsigned, %val_regoff
+; CHECK: ldrh {{w[0-9]+}}, [x0, w1, sxtw #1]
+
+ %ptr_regoff64 = getelementptr half, ptr %p, i64 %off64
+ %val_regoff64 = load atomic half, ptr %ptr_regoff64 monotonic, align 4
+ %tot2 = fadd half %tot1, %val_regoff64
+; CHECK: ldrh {{w[0-9]+}}, [x0, x2, lsl #1]
+
+ %ptr_unscaled = getelementptr half, ptr %p, i32 -64
+ %val_unscaled = load atomic half, ptr %ptr_unscaled unordered, align 4
+ %tot3 = fadd half %tot2, %val_unscaled
+; CHECK: ldurh {{w[0-9]+}}, [x0, #-128]
+
+ ret half %tot3
+}
+
+define bfloat @atomic_load_relaxed_bf16(ptr %p, i32 %off32, i64 %off64) #0 {
+; CHECK-LABEL: atomic_load_relaxed_bf16:
+ %ptr_unsigned = getelementptr bfloat, ptr %p, i32 4095
+ %val_unsigned = load atomic bfloat, ptr %ptr_unsigned monotonic, align 4
+; CHECK: ldrh {{w[0-9]+}}, [x0, #8190]
+
+ %ptr_regoff = getelementptr bfloat, ptr %p, i32 %off32
+ %val_regoff = load atomic bfloat, ptr %ptr_regoff unordered, align 4
+ %tot1 = fadd bfloat %val_unsigned, %val_regoff
+; CHECK: ldrh {{w[0-9]+}}, [x0, w1, sxtw #1]
+
+ %ptr_regoff64 = getelementptr bfloat, ptr %p, i64 %off64
+ %val_regoff64 = load atomic bfloat, ptr %ptr_regoff64 monotonic, align 4
+ %tot2 = fadd bfloat %tot1, %val_regoff64
+; CHECK: ldrh {{w[0-9]+}}, [x0, x2, lsl #1]
+
+ %ptr_unscaled = getelementptr bfloat, ptr %p, i32 -64
+ %val_unscaled = load atomic bfloat, ptr %ptr_unscaled unordered, align 4
+ %tot3 = fadd bfloat %tot2, %val_unscaled
+; CHECK: ldurh {{w[0-9]+}}, [x0, #-128]
+
+ ret bfloat %tot3
+}
+
+define void @atomic_store_relaxed_f16(ptr %p, i32 %off32, i64 %off64, half %val) #0 {
+; CHECK-LABEL: atomic_store_relaxed_f16:
+ %ptr_unsigned = getelementptr half, ptr %p, i32 4095
+ store atomic half %val, ptr %ptr_unsigned monotonic, align 4
+; CHECK: strh {{w[0-9]+}}, [x0, #8190]
+
+ %ptr_regoff = getelementptr half, ptr %p, i32 %off32
+ store atomic half %val, ptr %ptr_regoff unordered, align 4
+; CHECK: strh {{w[0-9]+}}, [x0, w1, sxtw #1]
+
+ %ptr_regoff64 = getelementptr half, ptr %p, i64 %off64
+ store atomic half %val, ptr %ptr_regoff64 monotonic, align 4
+; CHECK: strh {{w[0-9]+}}, [x0, x2, lsl #1]
+
+ %ptr_unscaled = getelementptr half, ptr %p, i32 -64
+ store atomic half %val, ptr %ptr_unscaled unordered, align 4
+; CHECK: sturh {{w[0-9]+}}, [x0, #-128]
+
+ ret void
+}
+
+define void @atomic_store_relaxed_bf16(ptr %p, i32 %off32, i64 %off64, bfloat %val) #0 {
+; CHECK-LABEL: atomic_store_relaxed_bf16:
+ %ptr_unsigned = getelementptr bfloat, ptr %p, i32 4095
+ store atomic bfloat %val, ptr %ptr_unsigned monotonic, align 4
+; CHECK: strh {{w[0-9]+}}, [x0, #8190]
+
+ %ptr_regoff = getelementptr bfloat, ptr %p, i32 %off32
+ store atomic bfloat %val, ptr %ptr_regoff unordered, align 4
+; CHECK: strh {{w[0-9]+}}, [x0, w1, sxtw #1]
+
+ %ptr_regoff64 = getelementptr bfloat, ptr %p, i64 %off64
+ store atomic bfloat %val, ptr %ptr_regoff64 monotonic, align 4
+; CHECK: strh {{w[0-9]+}}, [x0, x2, lsl #1]
+
+ %ptr_unscaled = getelementptr bfloat, ptr %p, i32 -64
+ store atomic bfloat %val, ptr %ptr_unscaled unordered, align 4
+; CHECK: sturh {{w[0-9]+}}, [x0, #-128]
+
+ ret void
+}
+
attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll
index 4f2e63b5f2467..c53fb2f330a79 100644
--- a/llvm/test/CodeGen/ARM/atomic-load-store.ll
+++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll
@@ -439,3 +439,539 @@ define void @test_old_store_64bit(ptr %p, i64 %v) {
store atomic i64 %v, ptr %p seq_cst, align 8
ret void
}
+
+define half @load_atomic_f16__seq_cst(ptr %ptr) {
+; ARM-LABEL: load_atomic_f16__seq_cst:
+; ARM: @ %bb.0:
+; ARM-NEXT: ldrh r0, [r0]
+; ARM-NEXT: dmb ish
+; ARM-NEXT: bx lr
+;
+; ARMOPTNONE-LABEL: load_atomic_f16__seq_cst:
+; ARMOPTNONE: @ %bb.0:
+; ARMOPTNONE-NEXT: ldrh r0, [r0]
+; ARMOPTNONE-NEXT: dmb ish
+; ARMOPTNONE-NEXT: bx lr
+;
+; THUMBTWO-LABEL: load_atomic_f16__seq_cst:
+; THUMBTWO: @ %bb.0:
+; THUMBTWO-NEXT: ldrh r0, [r0]
+; THUMBTWO-NEXT: dmb ish
+; THUMBTWO-NEXT: bx lr
+;
+; THUMBONE-LABEL: load_atomic_f16__seq_cst:
+; THUMBONE: @ %bb.0:
+; THUMBONE-NEXT: push {r7, lr}
+; THUMBONE-NEXT: movs r1, #0
+; THUMBONE-NEXT: mov r2, r1
+; THUMBONE-NEXT: bl __sync_val_compare_and_swap_2
+; THUMBONE-NEXT: pop {r7, pc}
+;
+; ARMV4-LABEL: load_atomic_f16__seq_cst:
+; ARMV4: @ %bb.0:
+; ARMV4-NEXT: push {r11, lr}
+; ARMV4-NEXT: mov r1, #5
+; ARMV4-NEXT: bl __atomic_load_2
+; ARMV4-NEXT: pop {r11, lr}
+; ARMV4-NEXT: mov pc, lr
+;
+; ARMV6-LABEL: load_atomic_f16__seq_cst:
+; ARMV6: @ %bb.0:
+; ARMV6-NEXT: ldrh r0, [r0]
+; ARMV6-NEXT: mov r1, #0
+; ARMV6-NEXT: mcr p15, #0, r1, c7, c10, #5
+; ARMV6-NEXT: bx lr
+;
+; THUMBM-LABEL: load_atomic_f16__seq_cst:
+; THUMBM: @ %bb.0:
+; THUMBM-NEXT: ldrh r0, [r0]
+; THUMBM-NEXT: dmb sy
+; THUMBM-NEXT: bx lr
+ %val = load atomic half, ptr %ptr seq_cst, align 2
+ ret half %val
+}
+
+define bfloat @load_atomic_bf16__seq_cst(ptr %ptr) {
+; ARM-LABEL: load_atomic_bf16__seq_cst:
+; ARM: @ %bb.0:
+; ARM-NEXT: ldrh r0, [r0]
+; ARM-NEXT: dmb ish
+; ARM-NEXT: bx lr
+;
+; ARMOPTNONE-LABEL: load_atomic_bf16__seq_cst:
+; ARMOPTNONE: @ %bb.0:
+; ARMOPTNONE-NEXT: ldrh r0, [r0]
+; ARMOPTNONE-NEXT: dmb ish
+; ARMOPTNONE-NEXT: bx lr
+;
+; THUMBTWO-LABEL: load_atomic_bf16__seq_cst:
+; THUMBTWO: @ %bb.0:
+; THUMBTWO-NEXT: ldrh r0, [r0]
+; THUMBTWO-NEXT: dmb ish
+; THUMBTWO-NEXT: bx lr
+;
+; THUMBONE-LABEL: load_atomic_bf16__seq_cst:
+; THUMBONE: @ %bb.0:
+; THUMBONE-NEXT: push {r7, lr}
+; THUMBONE-NEXT: movs r1, #0
+; THUMBONE-NEXT: mov r2, r1
+; THUMBONE-NEXT: bl __sync_val_compare_and_swap_2
+; THUMBONE-NEXT: pop {r7, pc}
+;
+; ARMV4-LABEL: load_atomic_bf16__seq_cst:
+; ARMV4: @ %bb.0:
+; ARMV4-NEXT: push {r11, lr}
+; ARMV4-NEXT: mov r1, #5
+; ARMV4-NEXT: bl __atomic_load_2
+; ARMV4-NEXT: pop {r11, lr}
+; ARMV4-NEXT: mov pc, lr
+;
+; ARMV6-LABEL: load_atomic_bf16__seq_cst:
+; ARMV6: @ %bb.0:
+; ARMV6-NEXT: ldrh r0, [r0]
+; ARMV6-NEXT: mov r1, #0
+; ARMV6-NEXT: mcr p15, #0, r1, c7, c10, #5
+; ARMV6-NEXT: bx lr
+;
+; THUMBM-LABEL: load_atomic_bf16__seq_cst:
+; THUMBM: @ %bb.0:
+; THUMBM-NEXT: ldrh r0, [r0]
+; THUMBM-NEXT: dmb sy
+; THUMBM-NEXT: bx lr
+ %val = load atomic bfloat, ptr %ptr seq_cst, align 2
+ ret bfloat %val
+}
+
+define float @load_atomic_f32__seq_cst(ptr %ptr) {
+; ARM-LABEL: load_atomic_f32__seq_cst:
+; ARM: @ %bb.0:
+; ARM-NEXT: ldr r0, [r0]
+; ARM-NEXT: dmb ish
+; ARM-NEXT: bx lr
+;
+; ARMOPTNONE-LABEL: load_atomic_f32__seq_cst:
+; ARMOPTNONE: @ %bb.0:
+; ARMOPTNONE-NEXT: ldr r0, [r0]
+; ARMOPTNONE-NEXT: dmb ish
+; ARMOPTNONE-NEXT: vmov s0, r0
+; ARMOPTNONE-NEXT: bx lr
+;
+; THUMBTWO-LABEL: load_atomic_f32__seq_cst:
+; THUMBTWO: @ %bb.0:
+; THUMBTWO-NEXT: ldr r0, [r0]
+; THUMBTWO-NEXT: dmb ish
+; THUMBTWO-NEXT: bx lr
+;
+; THUMBONE-LABEL: load_atomic_f32__seq_cst:
+; THUMBONE: @ %bb.0:
+; THUMBONE-NEXT: push {r7, lr}
+; THUMBONE-NEXT: movs r1, #0
+; THUMBONE-NEXT: mov r2, r1
+; THUMBONE-NEXT: bl __sync_val_compare_and_swap_4
+; THUMBONE-NEXT: pop {r7, pc}
+;
+; ARMV4-LABEL: load_atomic_f32__seq_cst:
+; ARMV4: @ %bb.0:
+; ARMV4-NEXT: push {r11, lr}
+; ARMV4-NEXT: mov r1, #5
+; ARMV4-NEXT: bl __atomic_load_4
+; ARMV4-NEXT: pop {r11, lr}
+; ARMV4-NEXT: mov pc, lr
+;
+; ARMV6-LABEL: load_atomic_f32__seq_cst:
+; ARMV6: @ %bb.0:
+; ARMV6-NEXT: ldr r0, [r0]
+; ARMV6-NEXT: mov r1, #0
+; ARMV6-NEXT: mcr p15, #0, r1, c7, c10, #5
+; ARMV6-NEXT: bx lr
+;
+; THUMBM-LABEL: load_atomic_f32__seq_cst:
+; THUMBM: @ %bb.0:
+; THUMBM-NEXT: ldr r0, [r0]
+; THUMBM-NEXT: dmb sy
+; THUMBM-NEXT: bx lr
+ %val = load atomic float, ptr %ptr seq_cst, align 4
+ ret float %val
+}
+
+define double @load_atomic_f64__seq_cst(ptr %ptr) {
+; ARM-LABEL: load_atomic_f64__seq_cst:
+; ARM: @ %bb.0:
+; ARM-NEXT: ldrexd r0, r1, [r0]
+; ARM-NEXT: clrex
+; ARM-NEXT: dmb ish
+; ARM-NEXT: bx lr
+;
+; ARMOPTNONE-LABEL: load_atomic_f64__seq_cst:
+; ARMOPTNONE: @ %bb.0:
+; ARMOPTNONE-NEXT: ldrexd r2, r3, [r0]
+; ARMOPTNONE-NEXT: mov r1, r3
+; ARMOPTNONE-NEXT: mov r0, r2
+; ARMOPTNONE-NEXT: clrex
+; ARMOPTNONE-NEXT: dmb ish
+; ARMOPTNONE-NEXT: vmov d16, r0, r1
+; ARMOPTNONE-NEXT: bx lr
+;
+; THUMBTWO-LABEL: load_atomic_f64__seq_cst:
+; THUMBTWO: @ %bb.0:
+; THUMBTWO-NEXT: ldrexd r0, r1, [r0]
+; THUMBTWO-NEXT: clrex
+; THUMBTWO-NEXT: dmb ish
+; THUMBTWO-NEXT: bx lr
+;
+; THUMBONE-LABEL: load_atomic_f64__seq_cst:
+; THUMBONE: @ %bb.0:
+; THUMBONE-NEXT: push {r7, lr}
+; THUMBONE-NEXT: sub sp, #8
+; THUMBONE-NEXT: movs r2, #0
+; THUMBONE-NEXT: str r2, [sp]
+; THUMBONE-NEXT: str r2, [sp, #4]
+; THUMBONE-NEXT: mov r3, r2
+; THUMBONE-NEXT: bl __sync_val_compare_and_swap_8
+; THUMBONE-NEXT: add sp, #8
+; THUMBONE-NEXT: pop {r7, pc}
+;
+; ARMV4-LABEL: load_atomic_f64__seq_cst:
+; ARMV4: @ %bb.0:
+; ARMV4-NEXT: push {r11, lr}
+; ARMV4-NEXT: mov r1, #5
+; ARMV4-NEXT: bl __atomic_load_8
+; ARMV4-NEXT: pop {r11, lr}
+; ARMV4-NEXT: mov pc, lr
+;
+; ARMV6-LABEL: load_atomic_f64__seq_cst:
+; ARMV6: @ %bb.0:
+; ARMV6-NEXT: ldrexd r0, r1, [r0]
+; ARMV6-NEXT: mov r2, #0
+; ARMV6-NEXT: mcr p15, #0, r2, c7, c10, #5
+; ARMV6-NEXT: bx lr
+;
+; THUMBM-LABEL: load_atomic_f64__seq_cst:
+; THUMBM: @ %bb.0:
+; THUMBM-NEXT: push {r7, lr}
+; THUMBM-NEXT: movs r1, #5
+; THUMBM-NEXT: bl __atomic_load_8
+; THUMBM-NEXT: pop {r7, pc}
+ %val = load atomic double, ptr %ptr seq_cst, align 8
+ ret double %val
+}
+
+define void @store_atomic_f16__seq_cst(ptr %ptr, half %val1) {
+; ARM-LABEL: store_atomic_f16__seq_cst:
+; ARM: @ %bb.0:
+; ARM-NEXT: dmb ish
+; ARM-NEXT: strh r1, [r0]
+; ARM-NEXT: dmb ish
+; ARM-NEXT: bx lr
+;
+; ARMOPTNONE-LABEL: store_atomic_f16__seq_cst:
+; ARMOPTNONE: @ %bb.0:
+; ARMOPTNONE-NEXT: sub sp, sp, #4
+; ARMOPTNONE-NEXT: str r1, [sp] @ 4-byte Spill
+; ARMOPTNONE-NEXT: mov r1, r0
+; ARMOPTNONE-NEXT: ldr r0, [sp] @ 4-byte Reload
+; ARMOPTNONE-NEXT: vmov s0, r0
+; ARMOPTNONE-NEXT: vmov r0, s0
+; ARMOPTNONE-NEXT: dmb ish
+; ARMOPTNONE-NEXT: strh r0, [r1]
+; ARMOPTNONE-NEXT: dmb ish
+; ARMOPTNONE-NEXT: add sp, sp, #4
+; ARMOPTNONE-NEXT: bx lr
+;
+; THUMBTWO-LABEL: store_atomic_f16__seq_cst:
+; THUMBTWO: @ %bb.0:
+; THUMBTWO-NEXT: dmb ish
+; THUMBTWO-NEXT: strh r1, [r0]
+; THUMBTWO-NEXT: dmb ish
+; THUMBTWO-NEXT: bx lr
+;
+; THUMBONE-LABEL: store_atomic_f16__seq_cst:
+; THUMBONE: @ %bb.0:
+; THUMBONE-NEXT: push {r7, lr}
+; THUMBONE-NEXT: bl __sync_lock_test_and_set_2
+; THUMBONE-NEXT: pop {r7, pc}
+;
+; ARMV4-LABEL: store_atomic_f16__seq_cst:
+; ARMV4: @ %bb.0:
+; ARMV4-NEXT: push {r11, lr}
+; ARMV4-NEXT: mov r2, #5
+; ARMV4-NEXT: bl __atomic_store_2
+; ARMV4-NEXT: pop {r11, lr}
+; ARMV4-NEXT: mov pc, lr
+;
+; ARMV6-LABEL: store_atomic_f16__seq_cst:
+; ARMV6: @ %bb.0:
+; ARMV6-NEXT: mov r2, #0
+; ARMV6-NEXT: mcr p15, #0, r2, c7, c10, #5
+; ARMV6-NEXT: strh r1, [r0]
+; ARMV6-NEXT: mcr p15, #0, r2, c7, c10, #5
+; ARMV6-NEXT: bx lr
+;
+; THUMBM-LABEL: store_atomic_f16__seq_cst:
+; THUMBM: @ %bb.0:
+; THUMBM-NEXT: dmb sy
+; THUMBM-NEXT: strh r1, [r0]
+; THUMBM-NEXT: dmb sy
+; THUMBM-NEXT: bx lr
+ store atomic half %val1, ptr %ptr seq_cst, align 2
+ ret void
+}
+
+define void @store_atomic_bf16__seq_cst(ptr %ptr, bfloat %val1) {
+; ARM-LABEL: store_atomic_bf16__seq_cst:
+; ARM: @ %bb.0:
+; ARM-NEXT: dmb ish
+; ARM-NEXT: strh r1, [r0]
+; ARM-NEXT: dmb ish
+; ARM-NEXT: bx lr
+;
+; ARMOPTNONE-LABEL: store_atomic_bf16__seq_cst:
+; ARMOPTNONE: @ %bb.0:
+; ARMOPTNONE-NEXT: sub sp, sp, #4
+; ARMOPTNONE-NEXT: str r1, [sp] @ 4-byte Spill
+; ARMOPTNONE-NEXT: mov r1, r0
+; ARMOPTNONE-NEXT: ldr r0, [sp] @ 4-byte Reload
+; ARMOPTNONE-NEXT: vmov s0, r0
+; ARMOPTNONE-NEXT: vmov r0, s0
+; ARMOPTNONE-NEXT: dmb ish
+; ARMOPTNONE-NEXT: strh r0, [r1]
+; ARMOPTNONE-NEXT: dmb ish
+; ARMOPTNONE-NEXT: add sp, sp, #4
+; ARMOPTNONE-NEXT: bx lr
+;
+; THUMBTWO-LABEL: store_atomic_bf16__seq_cst:
+; THUMBTWO: @ %bb.0:
+; THUMBTWO-NEXT: dmb ish
+; THUMBTWO-NEXT: strh r1, [r0]
+; THUMBTWO-NEXT: dmb ish
+; THUMBTWO-NEXT: bx lr
+;
+; THUMBONE-LABEL: store_atomic_bf16__seq_cst:
+; THUMBONE: @ %bb.0:
+; THUMBONE-NEXT: push {r7, lr}
+; THUMBONE-NEXT: bl __sync_lock_test_and_set_2
+; THUMBONE-NEXT: pop {r7, pc}
+;
+; ARMV4-LABEL: store_atomic_bf16__seq_cst:
+; ARMV4: @ %bb.0:
+; ARMV4-NEXT: push {r11, lr}
+; ARMV4-NEXT: mov r2, #5
+; ARMV4-NEXT: bl __atomic_store_2
+; ARMV4-NEXT: pop {r11, lr}
+; ARMV4-NEXT: mov pc, lr
+;
+; ARMV6-LABEL: store_atomic_bf16__seq_cst:
+; ARMV6: @ %bb.0:
+; ARMV6-NEXT: mov r2, #0
+; ARMV6-NEXT: mcr p15, #0, r2, c7, c10, #5
+; ARMV6-NEXT: strh r1, [r0]
+; ARMV6-NEXT: mcr p15, #0, r2, c7, c10, #5
+; ARMV6-NEXT: bx lr
+;
+; THUMBM-LABEL: store_atomic_bf16__seq_cst:
+; THUMBM: @ %bb.0:
+; THUMBM-NEXT: dmb sy
+; THUMBM-NEXT: strh r1, [r0]
+; THUMBM-NEXT: dmb sy
+; THUMBM-NEXT: bx lr
+ store atomic bfloat %val1, ptr %ptr seq_cst, align 2
+ ret void
+}
+
+define void @store_atomic_f32__seq_cst(ptr %ptr, float %val1) {
+; ARM-LABEL: store_atomic_f32__seq_cst:
+; ARM: @ %bb.0:
+; ARM-NEXT: dmb ish
+; ARM-NEXT: str r1, [r0]
+; ARM-NEXT: dmb ish
+; ARM-NEXT: bx lr
+;
+; ARMOPTNONE-LABEL: store_atomic_f32__seq_cst:
+; ARMOPTNONE: @ %bb.0:
+; ARMOPTNONE-NEXT: sub sp, sp, #4
+; ARMOPTNONE-NEXT: str r1, [sp] @ 4-byte Spill
+; ARMOPTNONE-NEXT: mov r1, r0
+; ARMOPTNONE-NEXT: ldr r0, [sp] @ 4-byte Reload
+; ARMOPTNONE-NEXT: vmov s0, r0
+; ARMOPTNONE-NEXT: vmov r0, s0
+; ARMOPTNONE-NEXT: dmb ish
+; ARMOPTNONE-NEXT: str r0, [r1]
+; ARMOPTNONE-NEXT: dmb ish
+; ARMOPTNONE-NEXT: add sp, sp, #4
+; ARMOPTNONE-NEXT: bx lr
+;
+; THUMBTWO-LABEL: store_atomic_f32__seq_cst:
+; THUMBTWO: @ %bb.0:
+; THUMBTWO-NEXT: dmb ish
+; THUMBTWO-NEXT: str r1, [r0]
+; THUMBTWO-NEXT: dmb ish
+; THUMBTWO-NEXT: bx lr
+;
+; THUMBONE-LABEL: store_atomic_f32__seq_cst:
+; THUMBONE: @ %bb.0:
+; THUMBONE-NEXT: push {r7, lr}
+; THUMBONE-NEXT: bl __sync_lock_test_and_set_4
+; THUMBONE-NEXT: pop {r7, pc}
+;
+; ARMV4-LABEL: store_atomic_f32__seq_cst:
+; ARMV4: @ %bb.0:
+; ARMV4-NEXT: push {r11, lr}
+; ARMV4-NEXT: mov r2, #5
+; ARMV4-NEXT: bl __atomic_store_4
+; ARMV4-NEXT: pop {r11, lr}
+; ARMV4-NEXT: mov pc, lr
+;
+; ARMV6-LABEL: store_atomic_f32__seq_cst:
+; ARMV6: @ %bb.0:
+; ARMV6-NEXT: mov r2, #0
+; ARMV6-NEXT: mcr p15, #0, r2, c7, c10, #5
+; ARMV6-NEXT: str r1, [r0]
+; ARMV6-NEXT: mcr p15, #0, r2, c7, c10, #5
+; ARMV6-NEXT: bx lr
+;
+; THUMBM-LABEL: store_atomic_f32__seq_cst:
+; THUMBM: ...
[truncated]
|
ret void | ||
} | ||
|
||
; Work around issue #92899 by casting to float |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe you can put it in a separate file and remove SSE1 test.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not putting in that effort; Instead of fracturing tests it's better to just fix the backend
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think it is a problem, or it is not fixable if you like to call it a problem.
As a comparison, exchange_half
does generate code under SSE1, but the code is invalid. You cannot call an external library successfully with a different calling conversion or mutual link with CU built with SSE2 and above.
In my opinion, spending efforts on invalid usage is unwise comparing to splitting the test case.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's fixable, and it's not even hard to fix. You can always just bitcast to i16, just like half is already handled.
An "external function" compiled the same way in the IR will work. The lack of defined C ABI is not relevant on getting the IR into a working state. You can always implement something
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That cannot be called a fix. It's not a simple "external function" problem, but the basestone functions like __truncsfhf2
, __extendhfsf2
that used by almost all operations. They are provided in library with standard ABI. You cannot make a meaningful application work without them.
The half has its own story. It's being supported in i16 prior to ABI definition. We leave it unchanged for ABI undefined case doesn't mean we have to duplicate it for new type.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
#93146 fixes this. Whether or not compiler-rt is wired up correctly to use the same IR type is a separate problem, but the IR has a clear handleable meaning
@@ -462,3 +462,212 @@ define i64 @and_i64_release(ptr %mem, i64 %operand) { | |||
%val = atomicrmw and ptr %mem, i64 %operand release | |||
ret i64 %val | |||
} | |||
|
|||
define half @load_atomic_f16__seq_cst(ptr %ptr) { | |||
; PPC32-LABEL: load_atomic_f16__seq_cst: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@bzEq Hi, would you please help to check these atomic cases on PPC? Thank you!
FP typed atomic load/store coverage was mostly missing, especially for half and bfloat.
FP typed atomic load/store coverage was mostly missing, especially for half and bfloat.