Skip to content

Commit 4d931b6

Browse files
committed
[NFC][libc] rearrange aarch64 memset code to better match new implementation
Differential Revision: https://reviews.llvm.org/D132121
1 parent fd874e5 commit 4d931b6

File tree

2 files changed

+12
-8
lines changed

2 files changed

+12
-8
lines changed

libc/src/string/memory_utils/elements_aarch64.h

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,9 @@ using _4 = __llvm_libc::scalar::_4;
5252
using _32 = Chained<_16, _16>;
5353
using _64 = Chained<_32, _32>;
5454

55-
struct ZVA {
55+
struct Zva64 {
5656
static constexpr size_t SIZE = 64;
57+
5758
static void splat_set(char *dst, const unsigned char) {
5859
#if __SIZEOF_POINTER__ == 4
5960
asm("dc zva, %w[dst]" : : [dst] "r"(dst) : "memory");
@@ -63,13 +64,14 @@ struct ZVA {
6364
}
6465
};
6566

66-
inline static bool AArch64ZVA(char *dst, size_t count) {
67+
inline static bool hasZva() {
6768
uint64_t zva_val;
6869
asm("mrs %[zva_val], dczid_el0" : [zva_val] "=r"(zva_val));
69-
if ((zva_val & 31) != 4)
70-
return false;
71-
splat_set<Align<_64, Arg::_1>::Then<Loop<ZVA, _64>>>(dst, 0, count);
72-
return true;
70+
// DC ZVA is permitted if DZP, bit [4] is zero.
71+
// BS, bits [3:0] is log2 of the block size in words.
72+
// So the next line checks whether the instruction is permitted and block size
73+
// is 16 words (i.e. 64 bytes).
74+
return (zva_val & 0b11111) == 0b00100;
7375
}
7476

7577
} // namespace aarch64_memset

libc/src/string/memory_utils/memset_implementations.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,15 +92,17 @@ inline static void inline_memset(char *dst, unsigned char value, size_t count) {
9292
return splat_set<HeadTail<_8>>(dst, value, count);
9393
if (count <= 32)
9494
return splat_set<HeadTail<_16>>(dst, value, count);
95-
if (count <= 96) {
95+
if (count <= (32 + 64)) {
9696
splat_set<_32>(dst, value);
9797
if (count <= 64)
9898
return splat_set<Tail<_32>>(dst, value, count);
9999
splat_set<Skip<32>::Then<_32>>(dst, value);
100100
splat_set<Tail<_32>>(dst, value, count);
101101
return;
102102
}
103-
if (count < 448 || value != 0 || !AArch64ZVA(dst, count))
103+
if (count >= 448 && value == 0 && hasZva())
104+
return splat_set<Align<_64, Arg::_1>::Then<Loop<Zva64>>>(dst, 0, count);
105+
else
104106
return splat_set<Align<_16, Arg::_1>::Then<Loop<_64>>>(dst, value, count);
105107
#else
106108
/////////////////////////////////////////////////////////////////////////////

0 commit comments

Comments
 (0)