Skip to content

Commit e6f81a9

Browse files
kvaneeshmpe
authored andcommitted
powerpc/mm/hash: Support 68 bit VA
Inorder to support large effective address range (512TB), we want to increase the virtual address bits to 68. But we do have platforms like p4 and p5 that can only do 65 bit VA. We support those platforms by limiting context bits on them to 16. The protovsid -> vsid conversion is verified to work with both 65 and 68 bit va values. I also documented the restrictions in a table format as part of code comments. Signed-off-by: Aneesh Kumar K.V <[email protected]> Signed-off-by: Michael Ellerman <[email protected]>
1 parent 85beb1c commit e6f81a9

File tree

5 files changed

+152
-65
lines changed

5 files changed

+152
-65
lines changed

arch/powerpc/include/asm/book3s/64/mmu-hash.h

Lines changed: 79 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939

4040
/* Bits in the SLB VSID word */
4141
#define SLB_VSID_SHIFT 12
42+
#define SLB_VSID_SHIFT_256M SLB_VSID_SHIFT
4243
#define SLB_VSID_SHIFT_1T 24
4344
#define SLB_VSID_SSIZE_SHIFT 62
4445
#define SLB_VSID_B ASM_CONST(0xc000000000000000)
@@ -521,9 +522,19 @@ extern void slb_set_size(u16 size);
521522
* because of the modulo operation in vsid scramble.
522523
*/
523524

525+
/*
526+
* Max Va bits we support as of now is 68 bits. We want 19 bit
527+
* context ID.
528+
* Restrictions:
529+
* GPU has restrictions of not able to access beyond 128TB
530+
* (47 bit effective address). We also cannot do more than 20bit PID.
531+
* For p4 and p5 which can only do 65 bit VA, we restrict our CONTEXT_BITS
532+
* to 16 bits (ie, we can only have 2^16 pids at the same time).
533+
*/
534+
#define VA_BITS 68
524535
#define CONTEXT_BITS 19
525-
#define ESID_BITS 18
526-
#define ESID_BITS_1T 6
536+
#define ESID_BITS (VA_BITS - (SID_SHIFT + CONTEXT_BITS))
537+
#define ESID_BITS_1T (VA_BITS - (SID_SHIFT_1T + CONTEXT_BITS))
527538

528539
#define ESID_BITS_MASK ((1 << ESID_BITS) - 1)
529540
#define ESID_BITS_1T_MASK ((1 << ESID_BITS_1T) - 1)
@@ -533,7 +544,7 @@ extern void slb_set_size(u16 size);
533544
* The proto-VSID space has 2^(CONTEX_BITS + ESID_BITS) - 1 segments
534545
* available for user + kernel mapping. VSID 0 is reserved as invalid, contexts
535546
* 1-4 are used for kernel mapping. Each segment contains 2^28 bytes. Each
536-
* context maps 2^46 bytes (64TB).
547+
* context maps 2^49 bytes (512TB).
537548
*
538549
* We also need to avoid the last segment of the last context, because that
539550
* would give a protovsid of 0x1fffffffff. That will result in a VSID 0
@@ -545,54 +556,46 @@ extern void slb_set_size(u16 size);
545556
/* Would be nice to use KERNEL_REGION_ID here */
546557
#define KERNEL_REGION_CONTEXT_OFFSET (0xc - 1)
547558

559+
/*
560+
* For platforms that support on 65bit VA we limit the context bits
561+
*/
562+
#define MAX_USER_CONTEXT_65BIT_VA ((ASM_CONST(1) << (65 - (SID_SHIFT + ESID_BITS))) - 2)
563+
548564
/*
549565
* This should be computed such that protovosid * vsid_mulitplier
550-
* doesn't overflow 64 bits. It should also be co-prime to vsid_modulus
566+
* doesn't overflow 64 bits. The vsid_mutliplier should also be
567+
* co-prime to vsid_modulus. We also need to make sure that number
568+
* of bits in multiplied result (dividend) is less than twice the number of
569+
* protovsid bits for our modulus optmization to work.
570+
*
571+
* The below table shows the current values used.
572+
* |-------+------------+----------------------+------------+-------------------|
573+
* | | Prime Bits | proto VSID_BITS_65VA | Total Bits | 2* prot VSID_BITS |
574+
* |-------+------------+----------------------+------------+-------------------|
575+
* | 1T | 24 | 25 | 49 | 50 |
576+
* |-------+------------+----------------------+------------+-------------------|
577+
* | 256MB | 24 | 37 | 61 | 74 |
578+
* |-------+------------+----------------------+------------+-------------------|
579+
*
580+
* |-------+------------+----------------------+------------+--------------------|
581+
* | | Prime Bits | proto VSID_BITS_68VA | Total Bits | 2* proto VSID_BITS |
582+
* |-------+------------+----------------------+------------+--------------------|
583+
* | 1T | 24 | 28 | 52 | 56 |
584+
* |-------+------------+----------------------+------------+--------------------|
585+
* | 256MB | 24 | 40 | 64 | 80 |
586+
* |-------+------------+----------------------+------------+--------------------|
587+
*
551588
*/
552589
#define VSID_MULTIPLIER_256M ASM_CONST(12538073) /* 24-bit prime */
553-
#define VSID_BITS_256M (CONTEXT_BITS + ESID_BITS)
554-
#define VSID_MODULUS_256M ((1UL<<VSID_BITS_256M)-1)
590+
#define VSID_BITS_256M (VA_BITS - SID_SHIFT)
591+
#define VSID_BITS_65_256M (65 - SID_SHIFT)
555592

556593
#define VSID_MULTIPLIER_1T ASM_CONST(12538073) /* 24-bit prime */
557-
#define VSID_BITS_1T (CONTEXT_BITS + ESID_BITS_1T)
558-
#define VSID_MODULUS_1T ((1UL<<VSID_BITS_1T)-1)
559-
594+
#define VSID_BITS_1T (VA_BITS - SID_SHIFT_1T)
595+
#define VSID_BITS_65_1T (65 - SID_SHIFT_1T)
560596

561597
#define USER_VSID_RANGE (1UL << (ESID_BITS + SID_SHIFT))
562598

563-
/*
564-
* This macro generates asm code to compute the VSID scramble
565-
* function. Used in slb_allocate() and do_stab_bolted. The function
566-
* computed is: (protovsid*VSID_MULTIPLIER) % VSID_MODULUS
567-
*
568-
* rt = register containing the proto-VSID and into which the
569-
* VSID will be stored
570-
* rx = scratch register (clobbered)
571-
*
572-
* - rt and rx must be different registers
573-
* - The answer will end up in the low VSID_BITS bits of rt. The higher
574-
* bits may contain other garbage, so you may need to mask the
575-
* result.
576-
*/
577-
#define ASM_VSID_SCRAMBLE(rt, rx, size) \
578-
lis rx,VSID_MULTIPLIER_##size@h; \
579-
ori rx,rx,VSID_MULTIPLIER_##size@l; \
580-
mulld rt,rt,rx; /* rt = rt * MULTIPLIER */ \
581-
\
582-
srdi rx,rt,VSID_BITS_##size; \
583-
clrldi rt,rt,(64-VSID_BITS_##size); \
584-
add rt,rt,rx; /* add high and low bits */ \
585-
/* NOTE: explanation based on VSID_BITS_##size = 36 \
586-
* Now, r3 == VSID (mod 2^36-1), and lies between 0 and \
587-
* 2^36-1+2^28-1. That in particular means that if r3 >= \
588-
* 2^36-1, then r3+1 has the 2^36 bit set. So, if r3+1 has \
589-
* the bit clear, r3 already has the answer we want, if it \
590-
* doesn't, the answer is the low 36 bits of r3+1. So in all \
591-
* cases the answer is the low 36 bits of (r3 + ((r3+1) >> 36))*/\
592-
addi rx,rt,1; \
593-
srdi rx,rx,VSID_BITS_##size; /* extract 2^VSID_BITS bit */ \
594-
add rt,rt,rx
595-
596599
/* 4 bits per slice and we have one slice per 1TB */
597600
#define SLICE_ARRAY_SIZE (H_PGTABLE_RANGE >> 41)
598601

@@ -640,14 +643,29 @@ static inline void subpage_prot_init_new_context(struct mm_struct *mm) { }
640643
#define vsid_scramble(protovsid, size) \
641644
((((protovsid) * VSID_MULTIPLIER_##size) % VSID_MODULUS_##size))
642645

643-
#else /* 1 */
646+
/* simplified form avoiding mod operation */
644647
#define vsid_scramble(protovsid, size) \
645648
({ \
646649
unsigned long x; \
647650
x = (protovsid) * VSID_MULTIPLIER_##size; \
648651
x = (x >> VSID_BITS_##size) + (x & VSID_MODULUS_##size); \
649652
(x + ((x+1) >> VSID_BITS_##size)) & VSID_MODULUS_##size; \
650653
})
654+
655+
#else /* 1 */
656+
static inline unsigned long vsid_scramble(unsigned long protovsid,
657+
unsigned long vsid_multiplier, int vsid_bits)
658+
{
659+
unsigned long vsid;
660+
unsigned long vsid_modulus = ((1UL << vsid_bits) - 1);
661+
/*
662+
* We have same multipler for both 256 and 1T segements now
663+
*/
664+
vsid = protovsid * vsid_multiplier;
665+
vsid = (vsid >> vsid_bits) + (vsid & vsid_modulus);
666+
return (vsid + ((vsid + 1) >> vsid_bits)) & vsid_modulus;
667+
}
668+
651669
#endif /* 1 */
652670

653671
/* Returns the segment size indicator for a user address */
@@ -662,17 +680,30 @@ static inline int user_segment_size(unsigned long addr)
662680
static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
663681
int ssize)
664682
{
683+
unsigned long va_bits = VA_BITS;
684+
unsigned long vsid_bits;
685+
unsigned long protovsid;
686+
665687
/*
666688
* Bad address. We return VSID 0 for that
667689
*/
668690
if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE)
669691
return 0;
670692

671-
if (ssize == MMU_SEGSIZE_256M)
672-
return vsid_scramble((context << ESID_BITS)
673-
| ((ea >> SID_SHIFT) & ESID_BITS_MASK), 256M);
674-
return vsid_scramble((context << ESID_BITS_1T)
675-
| ((ea >> SID_SHIFT_1T) & ESID_BITS_1T_MASK), 1T);
693+
if (!mmu_has_feature(MMU_FTR_68_BIT_VA))
694+
va_bits = 65;
695+
696+
if (ssize == MMU_SEGSIZE_256M) {
697+
vsid_bits = va_bits - SID_SHIFT;
698+
protovsid = (context << ESID_BITS) |
699+
((ea >> SID_SHIFT) & ESID_BITS_MASK);
700+
return vsid_scramble(protovsid, VSID_MULTIPLIER_256M, vsid_bits);
701+
}
702+
/* 1T segment */
703+
vsid_bits = va_bits - SID_SHIFT_1T;
704+
protovsid = (context << ESID_BITS_1T) |
705+
((ea >> SID_SHIFT_1T) & ESID_BITS_1T_MASK);
706+
return vsid_scramble(protovsid, VSID_MULTIPLIER_1T, vsid_bits);
676707
}
677708

678709
/*

arch/powerpc/include/asm/mmu.h

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@
2828
* Individual features below.
2929
*/
3030

31+
/*
32+
* Support for 68 bit VA space. We added that from ISA 2.05
33+
*/
34+
#define MMU_FTR_68_BIT_VA ASM_CONST(0x00002000)
3135
/*
3236
* Kernel read only support.
3337
* We added the ppp value 0b110 in ISA 2.04.
@@ -109,10 +113,10 @@
109113
#define MMU_FTRS_POWER4 MMU_FTRS_DEFAULT_HPTE_ARCH_V2
110114
#define MMU_FTRS_PPC970 MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA
111115
#define MMU_FTRS_POWER5 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
112-
#define MMU_FTRS_POWER6 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO
113-
#define MMU_FTRS_POWER7 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO
114-
#define MMU_FTRS_POWER8 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO
115-
#define MMU_FTRS_POWER9 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO
116+
#define MMU_FTRS_POWER6 MMU_FTRS_POWER5 | MMU_FTR_KERNEL_RO | MMU_FTR_68_BIT_VA
117+
#define MMU_FTRS_POWER7 MMU_FTRS_POWER6
118+
#define MMU_FTRS_POWER8 MMU_FTRS_POWER6
119+
#define MMU_FTRS_POWER9 MMU_FTRS_POWER6
116120
#define MMU_FTRS_CELL MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
117121
MMU_FTR_CI_LARGE_PAGE
118122
#define MMU_FTRS_PA6T MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
@@ -136,7 +140,7 @@ enum {
136140
MMU_FTR_NO_SLBIE_B | MMU_FTR_16M_PAGE | MMU_FTR_TLBIEL |
137141
MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_CI_LARGE_PAGE |
138142
MMU_FTR_1T_SEGMENT | MMU_FTR_TLBIE_CROP_VA |
139-
MMU_FTR_KERNEL_RO |
143+
MMU_FTR_KERNEL_RO | MMU_FTR_68_BIT_VA |
140144
#ifdef CONFIG_PPC_RADIX_MMU
141145
MMU_FTR_TYPE_RADIX |
142146
#endif
@@ -290,7 +294,10 @@ static inline bool early_radix_enabled(void)
290294
#define MMU_PAGE_16G 14
291295
#define MMU_PAGE_64G 15
292296

293-
/* N.B. we need to change the type of hpte_page_sizes if this gets to be > 16 */
297+
/*
298+
* N.B. we need to change the type of hpte_page_sizes if this gets to be > 16
299+
* Also we need to change he type of mm_context.low/high_slices_psize.
300+
*/
294301
#define MMU_PAGE_COUNT 16
295302

296303
#ifdef CONFIG_PPC_BOOK3S_64

arch/powerpc/kvm/book3s_64_mmu_host.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,7 @@ void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
229229

230230
static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
231231
{
232+
unsigned long vsid_bits = VSID_BITS_65_256M;
232233
struct kvmppc_sid_map *map;
233234
struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
234235
u16 sid_map_mask;
@@ -257,7 +258,12 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
257258
kvmppc_mmu_pte_flush(vcpu, 0, 0);
258259
kvmppc_mmu_flush_segments(vcpu);
259260
}
260-
map->host_vsid = vsid_scramble(vcpu_book3s->proto_vsid_next++, 256M);
261+
262+
if (mmu_has_feature(MMU_FTR_68_BIT_VA))
263+
vsid_bits = VSID_BITS_256M;
264+
265+
map->host_vsid = vsid_scramble(vcpu_book3s->proto_vsid_next++,
266+
VSID_MULTIPLIER_256M, vsid_bits);
261267

262268
map->guest_vsid = gvsid;
263269
map->valid = true;

arch/powerpc/mm/mmu_context_book3s64.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,14 @@ static int alloc_context_id(int min_id, int max_id)
5959

6060
int hash__alloc_context_id(void)
6161
{
62-
return alloc_context_id(MIN_USER_CONTEXT, MAX_USER_CONTEXT);
62+
unsigned long max;
63+
64+
if (mmu_has_feature(MMU_FTR_68_BIT_VA))
65+
max = MAX_USER_CONTEXT;
66+
else
67+
max = MAX_USER_CONTEXT_65BIT_VA;
68+
69+
return alloc_context_id(MIN_USER_CONTEXT, max);
6370
}
6471
EXPORT_SYMBOL_GPL(hash__alloc_context_id);
6572

arch/powerpc/mm/slb_low.S

Lines changed: 45 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,48 @@
2323
#include <asm/pgtable.h>
2424
#include <asm/firmware.h>
2525

26+
/*
27+
* This macro generates asm code to compute the VSID scramble
28+
* function. Used in slb_allocate() and do_stab_bolted. The function
29+
* computed is: (protovsid*VSID_MULTIPLIER) % VSID_MODULUS
30+
*
31+
* rt = register containing the proto-VSID and into which the
32+
* VSID will be stored
33+
* rx = scratch register (clobbered)
34+
* rf = flags
35+
*
36+
* - rt and rx must be different registers
37+
* - The answer will end up in the low VSID_BITS bits of rt. The higher
38+
* bits may contain other garbage, so you may need to mask the
39+
* result.
40+
*/
41+
#define ASM_VSID_SCRAMBLE(rt, rx, rf, size) \
42+
lis rx,VSID_MULTIPLIER_##size@h; \
43+
ori rx,rx,VSID_MULTIPLIER_##size@l; \
44+
mulld rt,rt,rx; /* rt = rt * MULTIPLIER */ \
45+
/* \
46+
* powermac get slb fault before feature fixup, so make 65 bit part \
47+
* the default part of feature fixup \
48+
*/ \
49+
BEGIN_MMU_FTR_SECTION \
50+
srdi rx,rt,VSID_BITS_65_##size; \
51+
clrldi rt,rt,(64-VSID_BITS_65_##size); \
52+
add rt,rt,rx; \
53+
addi rx,rt,1; \
54+
srdi rx,rx,VSID_BITS_65_##size; \
55+
add rt,rt,rx; \
56+
rldimi rf,rt,SLB_VSID_SHIFT_##size,(64 - (SLB_VSID_SHIFT_##size + VSID_BITS_65_##size)); \
57+
MMU_FTR_SECTION_ELSE \
58+
srdi rx,rt,VSID_BITS_##size; \
59+
clrldi rt,rt,(64-VSID_BITS_##size); \
60+
add rt,rt,rx; /* add high and low bits */ \
61+
addi rx,rt,1; \
62+
srdi rx,rx,VSID_BITS_##size; /* extract 2^VSID_BITS bit */ \
63+
add rt,rt,rx; \
64+
rldimi rf,rt,SLB_VSID_SHIFT_##size,(64 - (SLB_VSID_SHIFT_##size + VSID_BITS_##size)); \
65+
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_68_BIT_VA)
66+
67+
2668
/* void slb_allocate_realmode(unsigned long ea);
2769
*
2870
* Create an SLB entry for the given EA (user or kernel).
@@ -179,13 +221,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
179221
*/
180222
.Lslb_finish_load:
181223
rldimi r10,r9,ESID_BITS,0
182-
ASM_VSID_SCRAMBLE(r10,r9,256M)
183-
/*
184-
* bits above VSID_BITS_256M need to be ignored from r10
185-
* also combine VSID and flags
186-
*/
187-
rldimi r11,r10,SLB_VSID_SHIFT,(64 - (SLB_VSID_SHIFT + VSID_BITS_256M))
188-
224+
ASM_VSID_SCRAMBLE(r10,r9,r11,256M)
189225
/* r3 = EA, r11 = VSID data */
190226
/*
191227
* Find a slot, round robin. Previously we tried to find a
@@ -249,12 +285,12 @@ slb_compare_rr_to_size:
249285
.Lslb_finish_load_1T:
250286
srdi r10,r10,(SID_SHIFT_1T - SID_SHIFT) /* get 1T ESID */
251287
rldimi r10,r9,ESID_BITS_1T,0
252-
ASM_VSID_SCRAMBLE(r10,r9,1T)
288+
ASM_VSID_SCRAMBLE(r10,r9,r11,1T)
253289
/*
254290
* bits above VSID_BITS_1T need to be ignored from r10
255291
* also combine VSID and flags
256292
*/
257-
rldimi r11,r10,SLB_VSID_SHIFT_1T,(64 - (SLB_VSID_SHIFT_1T + VSID_BITS_1T))
293+
258294
li r10,MMU_SEGSIZE_1T
259295
rldimi r11,r10,SLB_VSID_SSIZE_SHIFT,0 /* insert segment size */
260296

0 commit comments

Comments
 (0)