Skip to content

Commit 8e7f37f

Browse files
committed
mm: Add mem_dump_obj() to print source of memory block
There are kernel facilities such as per-CPU reference counts that give error messages in generic handlers or callbacks, whose messages are unenlightening. In the case of per-CPU reference-count underflow, this is not a problem when creating a new use of this facility because in that case the bug is almost certainly in the code implementing that new use. However, trouble arises when deploying across many systems, which might exercise corner cases that were not seen during development and testing. Here, it would be really nice to get some kind of hint as to which of several uses the underflow was caused by. This commit therefore exposes a mem_dump_obj() function that takes a pointer to memory (which must still be allocated if it has been dynamically allocated) and prints available information on where that memory came from. This pointer can reference the middle of the block as well as the beginning of the block, as needed by things like RCU callback functions and timer handlers that might not know where the beginning of the memory block is. These functions and handlers can use mem_dump_obj() to print out better hints as to where the problem might lie. The information printed can depend on kernel configuration. For example, the allocation return address can be printed only for slab and slub, and even then only when the necessary debug has been enabled. For slab, build with CONFIG_DEBUG_SLAB=y, and either use sizes with ample space to the next power of two or use the SLAB_STORE_USER when creating the kmem_cache structure. For slub, build with CONFIG_SLUB_DEBUG=y and boot with slub_debug=U, or pass SLAB_STORE_USER to kmem_cache_create() if more focused use is desired. Also for slub, use CONFIG_STACKTRACE to enable printing of the allocation-time stack trace. Cc: Christoph Lameter <[email protected]> Cc: Pekka Enberg <[email protected]> Cc: David Rientjes <[email protected]> Cc: Joonsoo Kim <[email protected]> Cc: Andrew Morton <[email protected]> Cc: <[email protected]> Reported-by: Andrii Nakryiko <[email protected]> [ paulmck: Convert to printing and change names per Joonsoo Kim. ] [ paulmck: Move slab definition per Stephen Rothwell and kbuild test robot. ] [ paulmck: Handle CONFIG_MMU=n case where vmalloc() is kmalloc(). ] [ paulmck: Apply Vlastimil Babka feedback on slab.c kmem_provenance(). ] [ paulmck: Extract more info from !SLUB_DEBUG per Joonsoo Kim. ] [ paulmck: Explicitly check for small pointers per Naresh Kamboju. ] Acked-by: Joonsoo Kim <[email protected]> Acked-by: Vlastimil Babka <[email protected]> Tested-by: Naresh Kamboju <[email protected]> Signed-off-by: Paul E. McKenney <[email protected]>
1 parent 5c8fe58 commit 8e7f37f

File tree

8 files changed

+181
-0
lines changed

8 files changed

+181
-0
lines changed

include/linux/mm.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3169,5 +3169,7 @@ unsigned long wp_shared_mapping_range(struct address_space *mapping,
31693169

31703170
extern int sysctl_nr_trim_pages;
31713171

3172+
void mem_dump_obj(void *object);
3173+
31723174
#endif /* __KERNEL__ */
31733175
#endif /* _LINUX_MM_H */

include/linux/slab.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,8 @@ void kfree(const void *);
186186
void kfree_sensitive(const void *);
187187
size_t __ksize(const void *);
188188
size_t ksize(const void *);
189+
bool kmem_valid_obj(void *object);
190+
void kmem_dump_obj(void *object);
189191

190192
#ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR
191193
void __check_heap_object(const void *ptr, unsigned long n, struct page *page,

mm/slab.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3635,6 +3635,26 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
36353635
EXPORT_SYMBOL(__kmalloc_node_track_caller);
36363636
#endif /* CONFIG_NUMA */
36373637

3638+
void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
3639+
{
3640+
struct kmem_cache *cachep;
3641+
unsigned int objnr;
3642+
void *objp;
3643+
3644+
kpp->kp_ptr = object;
3645+
kpp->kp_page = page;
3646+
cachep = page->slab_cache;
3647+
kpp->kp_slab_cache = cachep;
3648+
objp = object - obj_offset(cachep);
3649+
kpp->kp_data_offset = obj_offset(cachep);
3650+
page = virt_to_head_page(objp);
3651+
objnr = obj_to_index(cachep, page, objp);
3652+
objp = index_to_obj(cachep, page, objnr);
3653+
kpp->kp_objp = objp;
3654+
if (DEBUG && cachep->flags & SLAB_STORE_USER)
3655+
kpp->kp_ret = *dbg_userword(cachep, objp);
3656+
}
3657+
36383658
/**
36393659
* __do_kmalloc - allocate memory
36403660
* @size: how many bytes of memory are required.

mm/slab.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,4 +615,16 @@ static inline bool slab_want_init_on_free(struct kmem_cache *c)
615615
return false;
616616
}
617617

618+
#define KS_ADDRS_COUNT 16
619+
struct kmem_obj_info {
620+
void *kp_ptr;
621+
struct page *kp_page;
622+
void *kp_objp;
623+
unsigned long kp_data_offset;
624+
struct kmem_cache *kp_slab_cache;
625+
void *kp_ret;
626+
void *kp_stack[KS_ADDRS_COUNT];
627+
};
628+
void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page);
629+
618630
#endif /* MM_SLAB_H */

mm/slab_common.c

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -537,6 +537,81 @@ bool slab_is_available(void)
537537
return slab_state >= UP;
538538
}
539539

540+
/**
541+
* kmem_valid_obj - does the pointer reference a valid slab object?
542+
* @object: pointer to query.
543+
*
544+
* Return: %true if the pointer is to a not-yet-freed object from
545+
* kmalloc() or kmem_cache_alloc(), either %true or %false if the pointer
546+
* is to an already-freed object, and %false otherwise.
547+
*/
548+
bool kmem_valid_obj(void *object)
549+
{
550+
struct page *page;
551+
552+
/* Some arches consider ZERO_SIZE_PTR to be a valid address. */
553+
if (object < (void *)PAGE_SIZE || !virt_addr_valid(object))
554+
return false;
555+
page = virt_to_head_page(object);
556+
return PageSlab(page);
557+
}
558+
559+
/**
560+
* kmem_dump_obj - Print available slab provenance information
561+
* @object: slab object for which to find provenance information.
562+
*
563+
* This function uses pr_cont(), so that the caller is expected to have
564+
* printed out whatever preamble is appropriate. The provenance information
565+
* depends on the type of object and on how much debugging is enabled.
566+
* For a slab-cache object, the fact that it is a slab object is printed,
567+
* and, if available, the slab name, return address, and stack trace from
568+
* the allocation of that object.
569+
*
570+
* This function will splat if passed a pointer to a non-slab object.
571+
* If you are not sure what type of object you have, you should instead
572+
* use mem_dump_obj().
573+
*/
574+
void kmem_dump_obj(void *object)
575+
{
576+
char *cp = IS_ENABLED(CONFIG_MMU) ? "" : "/vmalloc";
577+
int i;
578+
struct page *page;
579+
unsigned long ptroffset;
580+
struct kmem_obj_info kp = { };
581+
582+
if (WARN_ON_ONCE(!virt_addr_valid(object)))
583+
return;
584+
page = virt_to_head_page(object);
585+
if (WARN_ON_ONCE(!PageSlab(page))) {
586+
pr_cont(" non-slab memory.\n");
587+
return;
588+
}
589+
kmem_obj_info(&kp, object, page);
590+
if (kp.kp_slab_cache)
591+
pr_cont(" slab%s %s", cp, kp.kp_slab_cache->name);
592+
else
593+
pr_cont(" slab%s", cp);
594+
if (kp.kp_objp)
595+
pr_cont(" start %px", kp.kp_objp);
596+
if (kp.kp_data_offset)
597+
pr_cont(" data offset %lu", kp.kp_data_offset);
598+
if (kp.kp_objp) {
599+
ptroffset = ((char *)object - (char *)kp.kp_objp) - kp.kp_data_offset;
600+
pr_cont(" pointer offset %lu", ptroffset);
601+
}
602+
if (kp.kp_slab_cache && kp.kp_slab_cache->usersize)
603+
pr_cont(" size %u", kp.kp_slab_cache->usersize);
604+
if (kp.kp_ret)
605+
pr_cont(" allocated at %pS\n", kp.kp_ret);
606+
else
607+
pr_cont("\n");
608+
for (i = 0; i < ARRAY_SIZE(kp.kp_stack); i++) {
609+
if (!kp.kp_stack[i])
610+
break;
611+
pr_info(" %pS\n", kp.kp_stack[i]);
612+
}
613+
}
614+
540615
#ifndef CONFIG_SLOB
541616
/* Create a cache during boot when no slab services are available yet */
542617
void __init create_boot_cache(struct kmem_cache *s, const char *name,

mm/slob.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,12 @@ static void slob_free(void *block, int size)
461461
spin_unlock_irqrestore(&slob_lock, flags);
462462
}
463463

464+
void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
465+
{
466+
kpp->kp_ptr = object;
467+
kpp->kp_page = page;
468+
}
469+
464470
/*
465471
* End of slob allocator proper. Begin kmem_cache_alloc and kmalloc frontend.
466472
*/

mm/slub.c

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3919,6 +3919,46 @@ int __kmem_cache_shutdown(struct kmem_cache *s)
39193919
return 0;
39203920
}
39213921

3922+
void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
3923+
{
3924+
void *base;
3925+
int __maybe_unused i;
3926+
unsigned int objnr;
3927+
void *objp;
3928+
void *objp0;
3929+
struct kmem_cache *s = page->slab_cache;
3930+
struct track __maybe_unused *trackp;
3931+
3932+
kpp->kp_ptr = object;
3933+
kpp->kp_page = page;
3934+
kpp->kp_slab_cache = s;
3935+
base = page_address(page);
3936+
objp0 = kasan_reset_tag(object);
3937+
#ifdef CONFIG_SLUB_DEBUG
3938+
objp = restore_red_left(s, objp0);
3939+
#else
3940+
objp = objp0;
3941+
#endif
3942+
objnr = obj_to_index(s, page, objp);
3943+
kpp->kp_data_offset = (unsigned long)((char *)objp0 - (char *)objp);
3944+
objp = base + s->size * objnr;
3945+
kpp->kp_objp = objp;
3946+
if (WARN_ON_ONCE(objp < base || objp >= base + page->objects * s->size || (objp - base) % s->size) ||
3947+
!(s->flags & SLAB_STORE_USER))
3948+
return;
3949+
#ifdef CONFIG_SLUB_DEBUG
3950+
trackp = get_track(s, objp, TRACK_ALLOC);
3951+
kpp->kp_ret = (void *)trackp->addr;
3952+
#ifdef CONFIG_STACKTRACE
3953+
for (i = 0; i < KS_ADDRS_COUNT && i < TRACK_ADDRS_COUNT; i++) {
3954+
kpp->kp_stack[i] = (void *)trackp->addrs[i];
3955+
if (!kpp->kp_stack[i])
3956+
break;
3957+
}
3958+
#endif
3959+
#endif
3960+
}
3961+
39223962
/********************************************************************
39233963
* Kmalloc subsystem
39243964
*******************************************************************/

mm/util.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -982,3 +982,27 @@ int __weak memcmp_pages(struct page *page1, struct page *page2)
982982
kunmap_atomic(addr1);
983983
return ret;
984984
}
985+
986+
/**
987+
* mem_dump_obj - Print available provenance information
988+
* @object: object for which to find provenance information.
989+
*
990+
* This function uses pr_cont(), so that the caller is expected to have
991+
* printed out whatever preamble is appropriate. The provenance information
992+
* depends on the type of object and on how much debugging is enabled.
993+
* For example, for a slab-cache object, the slab name is printed, and,
994+
* if available, the return address and stack trace from the allocation
995+
* of that object.
996+
*/
997+
void mem_dump_obj(void *object)
998+
{
999+
if (!virt_addr_valid(object)) {
1000+
pr_cont(" non-paged (local) memory.\n");
1001+
return;
1002+
}
1003+
if (kmem_valid_obj(object)) {
1004+
kmem_dump_obj(object);
1005+
return;
1006+
}
1007+
pr_cont(" non-slab memory.\n");
1008+
}

0 commit comments

Comments
 (0)