Skip to content

Commit d09a8e6

Browse files
committed
Merge branch 'x86-dax-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 dax updates from Ingo Molnar: "This contains x86 memcpy_mcsafe() fault handling improvements the nvdimm tree would like to make more use of" * 'x86-dax-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/asm/memcpy_mcsafe: Define copy_to_iter_mcsafe() x86/asm/memcpy_mcsafe: Add write-protection-fault handling x86/asm/memcpy_mcsafe: Return bytes remaining x86/asm/memcpy_mcsafe: Add labels for __memcpy_mcsafe() write fault handling x86/asm/memcpy_mcsafe: Remove loop unrolling
2 parents 8316385 + 8780356 commit d09a8e6

File tree

10 files changed

+169
-68
lines changed

10 files changed

+169
-68
lines changed

arch/x86/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ config X86
6262
select ARCH_HAS_PMEM_API if X86_64
6363
select ARCH_HAS_REFCOUNT
6464
select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
65+
select ARCH_HAS_UACCESS_MCSAFE if X86_64
6566
select ARCH_HAS_SET_MEMORY
6667
select ARCH_HAS_SG_CHAIN
6768
select ARCH_HAS_STRICT_KERNEL_RWX

arch/x86/include/asm/string_64.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,8 @@ int strcmp(const char *cs, const char *ct);
116116
#endif
117117

118118
#define __HAVE_ARCH_MEMCPY_MCSAFE 1
119-
__must_check int memcpy_mcsafe_unrolled(void *dst, const void *src, size_t cnt);
119+
__must_check unsigned long __memcpy_mcsafe(void *dst, const void *src,
120+
size_t cnt);
120121
DECLARE_STATIC_KEY_FALSE(mcsafe_key);
121122

122123
/**
@@ -131,14 +132,15 @@ DECLARE_STATIC_KEY_FALSE(mcsafe_key);
131132
* actually do machine check recovery. Everyone else can just
132133
* use memcpy().
133134
*
134-
* Return 0 for success, -EFAULT for fail
135+
* Return 0 for success, or number of bytes not copied if there was an
136+
* exception.
135137
*/
136-
static __always_inline __must_check int
138+
static __always_inline __must_check unsigned long
137139
memcpy_mcsafe(void *dst, const void *src, size_t cnt)
138140
{
139141
#ifdef CONFIG_X86_MCE
140142
if (static_branch_unlikely(&mcsafe_key))
141-
return memcpy_mcsafe_unrolled(dst, src, cnt);
143+
return __memcpy_mcsafe(dst, src, cnt);
142144
else
143145
#endif
144146
memcpy(dst, src, cnt);

arch/x86/include/asm/uaccess_64.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,17 @@ copy_user_generic(void *to, const void *from, unsigned len)
4646
return ret;
4747
}
4848

49+
static __always_inline __must_check unsigned long
50+
copy_to_user_mcsafe(void *to, const void *from, unsigned len)
51+
{
52+
unsigned long ret;
53+
54+
__uaccess_begin();
55+
ret = memcpy_mcsafe(to, from, len);
56+
__uaccess_end();
57+
return ret;
58+
}
59+
4960
static __always_inline __must_check unsigned long
5061
raw_copy_from_user(void *dst, const void __user *src, unsigned long size)
5162
{
@@ -194,4 +205,7 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
194205
unsigned long
195206
copy_user_handle_tail(char *to, char *from, unsigned len);
196207

208+
unsigned long
209+
mcsafe_handle_tail(char *to, char *from, unsigned len);
210+
197211
#endif /* _ASM_X86_UACCESS_64_H */

arch/x86/lib/memcpy_64.S

Lines changed: 44 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -184,11 +184,11 @@ ENDPROC(memcpy_orig)
184184

185185
#ifndef CONFIG_UML
186186
/*
187-
* memcpy_mcsafe_unrolled - memory copy with machine check exception handling
187+
* __memcpy_mcsafe - memory copy with machine check exception handling
188188
* Note that we only catch machine checks when reading the source addresses.
189189
* Writes to target are posted and don't generate machine checks.
190190
*/
191-
ENTRY(memcpy_mcsafe_unrolled)
191+
ENTRY(__memcpy_mcsafe)
192192
cmpl $8, %edx
193193
/* Less than 8 bytes? Go to byte copy loop */
194194
jb .L_no_whole_words
@@ -204,58 +204,29 @@ ENTRY(memcpy_mcsafe_unrolled)
204204
subl $8, %ecx
205205
negl %ecx
206206
subl %ecx, %edx
207-
.L_copy_leading_bytes:
207+
.L_read_leading_bytes:
208208
movb (%rsi), %al
209+
.L_write_leading_bytes:
209210
movb %al, (%rdi)
210211
incq %rsi
211212
incq %rdi
212213
decl %ecx
213-
jnz .L_copy_leading_bytes
214+
jnz .L_read_leading_bytes
214215

215216
.L_8byte_aligned:
216-
/* Figure out how many whole cache lines (64-bytes) to copy */
217-
movl %edx, %ecx
218-
andl $63, %edx
219-
shrl $6, %ecx
220-
jz .L_no_whole_cache_lines
221-
222-
/* Loop copying whole cache lines */
223-
.L_cache_w0: movq (%rsi), %r8
224-
.L_cache_w1: movq 1*8(%rsi), %r9
225-
.L_cache_w2: movq 2*8(%rsi), %r10
226-
.L_cache_w3: movq 3*8(%rsi), %r11
227-
movq %r8, (%rdi)
228-
movq %r9, 1*8(%rdi)
229-
movq %r10, 2*8(%rdi)
230-
movq %r11, 3*8(%rdi)
231-
.L_cache_w4: movq 4*8(%rsi), %r8
232-
.L_cache_w5: movq 5*8(%rsi), %r9
233-
.L_cache_w6: movq 6*8(%rsi), %r10
234-
.L_cache_w7: movq 7*8(%rsi), %r11
235-
movq %r8, 4*8(%rdi)
236-
movq %r9, 5*8(%rdi)
237-
movq %r10, 6*8(%rdi)
238-
movq %r11, 7*8(%rdi)
239-
leaq 64(%rsi), %rsi
240-
leaq 64(%rdi), %rdi
241-
decl %ecx
242-
jnz .L_cache_w0
243-
244-
/* Are there any trailing 8-byte words? */
245-
.L_no_whole_cache_lines:
246217
movl %edx, %ecx
247218
andl $7, %edx
248219
shrl $3, %ecx
249220
jz .L_no_whole_words
250221

251-
/* Copy trailing words */
252-
.L_copy_trailing_words:
222+
.L_read_words:
253223
movq (%rsi), %r8
254-
mov %r8, (%rdi)
255-
leaq 8(%rsi), %rsi
256-
leaq 8(%rdi), %rdi
224+
.L_write_words:
225+
movq %r8, (%rdi)
226+
addq $8, %rsi
227+
addq $8, %rdi
257228
decl %ecx
258-
jnz .L_copy_trailing_words
229+
jnz .L_read_words
259230

260231
/* Any trailing bytes? */
261232
.L_no_whole_words:
@@ -264,38 +235,53 @@ ENTRY(memcpy_mcsafe_unrolled)
264235

265236
/* Copy trailing bytes */
266237
movl %edx, %ecx
267-
.L_copy_trailing_bytes:
238+
.L_read_trailing_bytes:
268239
movb (%rsi), %al
240+
.L_write_trailing_bytes:
269241
movb %al, (%rdi)
270242
incq %rsi
271243
incq %rdi
272244
decl %ecx
273-
jnz .L_copy_trailing_bytes
245+
jnz .L_read_trailing_bytes
274246

275247
/* Copy successful. Return zero */
276248
.L_done_memcpy_trap:
277249
xorq %rax, %rax
278250
ret
279-
ENDPROC(memcpy_mcsafe_unrolled)
280-
EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled)
251+
ENDPROC(__memcpy_mcsafe)
252+
EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
281253

282254
.section .fixup, "ax"
283-
/* Return -EFAULT for any failure */
284-
.L_memcpy_mcsafe_fail:
285-
mov $-EFAULT, %rax
255+
/*
256+
* Return number of bytes not copied for any failure. Note that
257+
* there is no "tail" handling since the source buffer is 8-byte
258+
* aligned and poison is cacheline aligned.
259+
*/
260+
.E_read_words:
261+
shll $3, %ecx
262+
.E_leading_bytes:
263+
addl %edx, %ecx
264+
.E_trailing_bytes:
265+
mov %ecx, %eax
286266
ret
287267

268+
/*
269+
* For write fault handling, given the destination is unaligned,
270+
* we handle faults on multi-byte writes with a byte-by-byte
271+
* copy up to the write-protected page.
272+
*/
273+
.E_write_words:
274+
shll $3, %ecx
275+
addl %edx, %ecx
276+
movl %ecx, %edx
277+
jmp mcsafe_handle_tail
278+
288279
.previous
289280

290-
_ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail)
291-
_ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail)
292-
_ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail)
293-
_ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail)
294-
_ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
295-
_ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail)
296-
_ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail)
297-
_ASM_EXTABLE_FAULT(.L_cache_w6, .L_memcpy_mcsafe_fail)
298-
_ASM_EXTABLE_FAULT(.L_cache_w7, .L_memcpy_mcsafe_fail)
299-
_ASM_EXTABLE_FAULT(.L_copy_trailing_words, .L_memcpy_mcsafe_fail)
300-
_ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail)
281+
_ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
282+
_ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
283+
_ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
284+
_ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
285+
_ASM_EXTABLE(.L_write_words, .E_write_words)
286+
_ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
301287
#endif

arch/x86/lib/usercopy_64.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,27 @@ copy_user_handle_tail(char *to, char *from, unsigned len)
7474
return len;
7575
}
7676

77+
/*
78+
* Similar to copy_user_handle_tail, probe for the write fault point,
79+
* but reuse __memcpy_mcsafe in case a new read error is encountered.
80+
* clac() is handled in _copy_to_iter_mcsafe().
81+
*/
82+
__visible unsigned long
83+
mcsafe_handle_tail(char *to, char *from, unsigned len)
84+
{
85+
for (; len; --len, to++, from++) {
86+
/*
87+
* Call the assembly routine back directly since
88+
* memcpy_mcsafe() may silently fallback to memcpy.
89+
*/
90+
unsigned long rem = __memcpy_mcsafe(to, from, 1);
91+
92+
if (rem)
93+
break;
94+
}
95+
return len;
96+
}
97+
7798
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
7899
/**
79100
* clean_cache_range - write back a cache range with CLWB

drivers/nvdimm/claim.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,8 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
276276
if (rw == READ) {
277277
if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align)))
278278
return -EIO;
279-
return memcpy_mcsafe(buf, nsio->addr + offset, size);
279+
if (memcpy_mcsafe(buf, nsio->addr + offset, size) != 0)
280+
return -EIO;
280281
}
281282

282283
if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) {

drivers/nvdimm/pmem.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -101,15 +101,15 @@ static blk_status_t read_pmem(struct page *page, unsigned int off,
101101
void *pmem_addr, unsigned int len)
102102
{
103103
unsigned int chunk;
104-
int rc;
104+
unsigned long rem;
105105
void *mem;
106106

107107
while (len) {
108108
mem = kmap_atomic(page);
109109
chunk = min_t(unsigned int, len, PAGE_SIZE);
110-
rc = memcpy_mcsafe(mem + off, pmem_addr, chunk);
110+
rem = memcpy_mcsafe(mem + off, pmem_addr, chunk);
111111
kunmap_atomic(mem);
112-
if (rc)
112+
if (rem)
113113
return BLK_STS_IOERR;
114114
len -= chunk;
115115
off = 0;

include/linux/string.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,8 +147,8 @@ extern int memcmp(const void *,const void *,__kernel_size_t);
147147
extern void * memchr(const void *,int,__kernel_size_t);
148148
#endif
149149
#ifndef __HAVE_ARCH_MEMCPY_MCSAFE
150-
static inline __must_check int memcpy_mcsafe(void *dst, const void *src,
151-
size_t cnt)
150+
static inline __must_check unsigned long memcpy_mcsafe(void *dst,
151+
const void *src, size_t cnt)
152152
{
153153
memcpy(dst, src, cnt);
154154
return 0;

include/linux/uio.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,12 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i);
154154
#define _copy_from_iter_flushcache _copy_from_iter_nocache
155155
#endif
156156

157+
#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
158+
size_t _copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i);
159+
#else
160+
#define _copy_to_iter_mcsafe _copy_to_iter
161+
#endif
162+
157163
static __always_inline __must_check
158164
size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
159165
{
@@ -163,6 +169,15 @@ size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
163169
return _copy_from_iter_flushcache(addr, bytes, i);
164170
}
165171

172+
static __always_inline __must_check
173+
size_t copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i)
174+
{
175+
if (unlikely(!check_copy_size(addr, bytes, false)))
176+
return 0;
177+
else
178+
return _copy_to_iter_mcsafe(addr, bytes, i);
179+
}
180+
166181
size_t iov_iter_zero(size_t bytes, struct iov_iter *);
167182
unsigned long iov_iter_alignment(const struct iov_iter *i);
168183
unsigned long iov_iter_gap_alignment(const struct iov_iter *i);

lib/iov_iter.c

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -573,6 +573,67 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
573573
}
574574
EXPORT_SYMBOL(_copy_to_iter);
575575

576+
#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
577+
static int copyout_mcsafe(void __user *to, const void *from, size_t n)
578+
{
579+
if (access_ok(VERIFY_WRITE, to, n)) {
580+
kasan_check_read(from, n);
581+
n = copy_to_user_mcsafe((__force void *) to, from, n);
582+
}
583+
return n;
584+
}
585+
586+
static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset,
587+
const char *from, size_t len)
588+
{
589+
unsigned long ret;
590+
char *to;
591+
592+
to = kmap_atomic(page);
593+
ret = memcpy_mcsafe(to + offset, from, len);
594+
kunmap_atomic(to);
595+
596+
return ret;
597+
}
598+
599+
size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
600+
{
601+
const char *from = addr;
602+
unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
603+
604+
if (unlikely(i->type & ITER_PIPE)) {
605+
WARN_ON(1);
606+
return 0;
607+
}
608+
if (iter_is_iovec(i))
609+
might_fault();
610+
iterate_and_advance(i, bytes, v,
611+
copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
612+
({
613+
rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset,
614+
(from += v.bv_len) - v.bv_len, v.bv_len);
615+
if (rem) {
616+
curr_addr = (unsigned long) from;
617+
bytes = curr_addr - s_addr - rem;
618+
return bytes;
619+
}
620+
}),
621+
({
622+
rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len,
623+
v.iov_len);
624+
if (rem) {
625+
curr_addr = (unsigned long) from;
626+
bytes = curr_addr - s_addr - rem;
627+
return bytes;
628+
}
629+
})
630+
)
631+
632+
return bytes;
633+
}
634+
EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe);
635+
#endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */
636+
576637
size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
577638
{
578639
char *to = addr;

0 commit comments

Comments
 (0)