@@ -241,7 +241,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
241
241
*
242
242
* Returns a pointer to a PTE on success, or NULL on failure.
243
243
*/
244
- static pte_t * pti_user_pagetable_walk_pte (unsigned long address )
244
+ static pte_t * pti_user_pagetable_walk_pte (unsigned long address , bool late_text )
245
245
{
246
246
gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO );
247
247
pmd_t * pmd ;
@@ -251,10 +251,15 @@ static pte_t *pti_user_pagetable_walk_pte(unsigned long address)
251
251
if (!pmd )
252
252
return NULL ;
253
253
254
- /* We can't do anything sensible if we hit a large mapping. */
254
+ /* Large PMD mapping found */
255
255
if (pmd_leaf (* pmd )) {
256
- WARN_ON (1 );
257
- return NULL ;
256
+ /* Clear the PMD if we hit a large mapping from the first round */
257
+ if (late_text ) {
258
+ set_pmd (pmd , __pmd (0 ));
259
+ } else {
260
+ WARN_ON_ONCE (1 );
261
+ return NULL ;
262
+ }
258
263
}
259
264
260
265
if (pmd_none (* pmd )) {
@@ -283,7 +288,7 @@ static void __init pti_setup_vsyscall(void)
283
288
if (!pte || WARN_ON (level != PG_LEVEL_4K ) || pte_none (* pte ))
284
289
return ;
285
290
286
- target_pte = pti_user_pagetable_walk_pte (VSYSCALL_ADDR );
291
+ target_pte = pti_user_pagetable_walk_pte (VSYSCALL_ADDR , false );
287
292
if (WARN_ON (!target_pte ))
288
293
return ;
289
294
@@ -301,7 +306,7 @@ enum pti_clone_level {
301
306
302
307
static void
303
308
pti_clone_pgtable (unsigned long start , unsigned long end ,
304
- enum pti_clone_level level )
309
+ enum pti_clone_level level , bool late_text )
305
310
{
306
311
unsigned long addr ;
307
312
@@ -390,7 +395,7 @@ pti_clone_pgtable(unsigned long start, unsigned long end,
390
395
return ;
391
396
392
397
/* Allocate PTE in the user page-table */
393
- target_pte = pti_user_pagetable_walk_pte (addr );
398
+ target_pte = pti_user_pagetable_walk_pte (addr , late_text );
394
399
if (WARN_ON (!target_pte ))
395
400
return ;
396
401
@@ -452,7 +457,7 @@ static void __init pti_clone_user_shared(void)
452
457
phys_addr_t pa = per_cpu_ptr_to_phys ((void * )va );
453
458
pte_t * target_pte ;
454
459
455
- target_pte = pti_user_pagetable_walk_pte (va );
460
+ target_pte = pti_user_pagetable_walk_pte (va , false );
456
461
if (WARN_ON (!target_pte ))
457
462
return ;
458
463
@@ -475,7 +480,7 @@ static void __init pti_clone_user_shared(void)
475
480
start = CPU_ENTRY_AREA_BASE ;
476
481
end = start + (PAGE_SIZE * CPU_ENTRY_AREA_PAGES );
477
482
478
- pti_clone_pgtable (start , end , PTI_CLONE_PMD );
483
+ pti_clone_pgtable (start , end , PTI_CLONE_PMD , false );
479
484
}
480
485
#endif /* CONFIG_X86_64 */
481
486
@@ -492,11 +497,11 @@ static void __init pti_setup_espfix64(void)
492
497
/*
493
498
* Clone the populated PMDs of the entry text and force it RO.
494
499
*/
495
- static void pti_clone_entry_text (void )
500
+ static void pti_clone_entry_text (bool late )
496
501
{
497
502
pti_clone_pgtable ((unsigned long ) __entry_text_start ,
498
503
(unsigned long ) __entry_text_end ,
499
- PTI_LEVEL_KERNEL_IMAGE );
504
+ PTI_LEVEL_KERNEL_IMAGE , late );
500
505
}
501
506
502
507
/*
@@ -571,7 +576,7 @@ static void pti_clone_kernel_text(void)
571
576
* pti_set_kernel_image_nonglobal() did to clear the
572
577
* global bit.
573
578
*/
574
- pti_clone_pgtable (start , end_clone , PTI_LEVEL_KERNEL_IMAGE );
579
+ pti_clone_pgtable (start , end_clone , PTI_LEVEL_KERNEL_IMAGE , false );
575
580
576
581
/*
577
582
* pti_clone_pgtable() will set the global bit in any PMDs
@@ -638,8 +643,15 @@ void __init pti_init(void)
638
643
639
644
/* Undo all global bits from the init pagetables in head_64.S: */
640
645
pti_set_kernel_image_nonglobal ();
646
+
641
647
/* Replace some of the global bits just for shared entry text: */
642
- pti_clone_entry_text ();
648
+ /*
649
+ * This is very early in boot. Device and Late initcalls can do
650
+ * modprobe before free_initmem() and mark_readonly(). This
651
+ * pti_clone_entry_text() allows those user-mode-helpers to function,
652
+ * but notably the text is still RW.
653
+ */
654
+ pti_clone_entry_text (false);
643
655
pti_setup_espfix64 ();
644
656
pti_setup_vsyscall ();
645
657
}
@@ -656,10 +668,11 @@ void pti_finalize(void)
656
668
if (!boot_cpu_has (X86_FEATURE_PTI ))
657
669
return ;
658
670
/*
659
- * We need to clone everything (again) that maps parts of the
660
- * kernel image.
671
+ * This is after free_initmem() (all initcalls are done) and we've done
672
+ * mark_readonly(). Text is now NX which might've split some PMDs
673
+ * relative to the early clone.
661
674
*/
662
- pti_clone_entry_text ();
675
+ pti_clone_entry_text (true );
663
676
pti_clone_kernel_text ();
664
677
665
678
debug_checkwx_user ();
0 commit comments