39
39
40
40
/* Bits in the SLB VSID word */
41
41
#define SLB_VSID_SHIFT 12
42
+ #define SLB_VSID_SHIFT_256M SLB_VSID_SHIFT
42
43
#define SLB_VSID_SHIFT_1T 24
43
44
#define SLB_VSID_SSIZE_SHIFT 62
44
45
#define SLB_VSID_B ASM_CONST(0xc000000000000000)
@@ -521,9 +522,19 @@ extern void slb_set_size(u16 size);
521
522
* because of the modulo operation in vsid scramble.
522
523
*/
523
524
525
+ /*
526
+ * Max Va bits we support as of now is 68 bits. We want 19 bit
527
+ * context ID.
528
+ * Restrictions:
529
+ * GPU has restrictions of not able to access beyond 128TB
530
+ * (47 bit effective address). We also cannot do more than 20bit PID.
531
+ * For p4 and p5 which can only do 65 bit VA, we restrict our CONTEXT_BITS
532
+ * to 16 bits (ie, we can only have 2^16 pids at the same time).
533
+ */
534
+ #define VA_BITS 68
524
535
#define CONTEXT_BITS 19
525
- #define ESID_BITS 18
526
- #define ESID_BITS_1T 6
536
+ #define ESID_BITS (VA_BITS - (SID_SHIFT + CONTEXT_BITS))
537
+ #define ESID_BITS_1T (VA_BITS - (SID_SHIFT_1T + CONTEXT_BITS))
527
538
528
539
#define ESID_BITS_MASK ((1 << ESID_BITS) - 1)
529
540
#define ESID_BITS_1T_MASK ((1 << ESID_BITS_1T) - 1)
@@ -533,7 +544,7 @@ extern void slb_set_size(u16 size);
533
544
* The proto-VSID space has 2^(CONTEX_BITS + ESID_BITS) - 1 segments
534
545
* available for user + kernel mapping. VSID 0 is reserved as invalid, contexts
535
546
* 1-4 are used for kernel mapping. Each segment contains 2^28 bytes. Each
536
- * context maps 2^46 bytes (64TB ).
547
+ * context maps 2^49 bytes (512TB ).
537
548
*
538
549
* We also need to avoid the last segment of the last context, because that
539
550
* would give a protovsid of 0x1fffffffff. That will result in a VSID 0
@@ -545,54 +556,46 @@ extern void slb_set_size(u16 size);
545
556
/* Would be nice to use KERNEL_REGION_ID here */
546
557
#define KERNEL_REGION_CONTEXT_OFFSET (0xc - 1)
547
558
559
+ /*
560
+ * For platforms that support on 65bit VA we limit the context bits
561
+ */
562
+ #define MAX_USER_CONTEXT_65BIT_VA ((ASM_CONST(1) << (65 - (SID_SHIFT + ESID_BITS))) - 2)
563
+
548
564
/*
549
565
* This should be computed such that protovosid * vsid_mulitplier
550
- * doesn't overflow 64 bits. It should also be co-prime to vsid_modulus
566
+ * doesn't overflow 64 bits. The vsid_mutliplier should also be
567
+ * co-prime to vsid_modulus. We also need to make sure that number
568
+ * of bits in multiplied result (dividend) is less than twice the number of
569
+ * protovsid bits for our modulus optmization to work.
570
+ *
571
+ * The below table shows the current values used.
572
+ * |-------+------------+----------------------+------------+-------------------|
573
+ * | | Prime Bits | proto VSID_BITS_65VA | Total Bits | 2* prot VSID_BITS |
574
+ * |-------+------------+----------------------+------------+-------------------|
575
+ * | 1T | 24 | 25 | 49 | 50 |
576
+ * |-------+------------+----------------------+------------+-------------------|
577
+ * | 256MB | 24 | 37 | 61 | 74 |
578
+ * |-------+------------+----------------------+------------+-------------------|
579
+ *
580
+ * |-------+------------+----------------------+------------+--------------------|
581
+ * | | Prime Bits | proto VSID_BITS_68VA | Total Bits | 2* proto VSID_BITS |
582
+ * |-------+------------+----------------------+------------+--------------------|
583
+ * | 1T | 24 | 28 | 52 | 56 |
584
+ * |-------+------------+----------------------+------------+--------------------|
585
+ * | 256MB | 24 | 40 | 64 | 80 |
586
+ * |-------+------------+----------------------+------------+--------------------|
587
+ *
551
588
*/
552
589
#define VSID_MULTIPLIER_256M ASM_CONST(12538073) /* 24-bit prime */
553
- #define VSID_BITS_256M (CONTEXT_BITS + ESID_BITS )
554
- #define VSID_MODULUS_256M ((1UL<<VSID_BITS_256M)-1 )
590
+ #define VSID_BITS_256M (VA_BITS - SID_SHIFT )
591
+ #define VSID_BITS_65_256M (65 - SID_SHIFT )
555
592
556
593
#define VSID_MULTIPLIER_1T ASM_CONST(12538073) /* 24-bit prime */
557
- #define VSID_BITS_1T (CONTEXT_BITS + ESID_BITS_1T)
558
- #define VSID_MODULUS_1T ((1UL<<VSID_BITS_1T)-1)
559
-
594
+ #define VSID_BITS_1T (VA_BITS - SID_SHIFT_1T)
595
+ #define VSID_BITS_65_1T (65 - SID_SHIFT_1T)
560
596
561
597
#define USER_VSID_RANGE (1UL << (ESID_BITS + SID_SHIFT))
562
598
563
- /*
564
- * This macro generates asm code to compute the VSID scramble
565
- * function. Used in slb_allocate() and do_stab_bolted. The function
566
- * computed is: (protovsid*VSID_MULTIPLIER) % VSID_MODULUS
567
- *
568
- * rt = register containing the proto-VSID and into which the
569
- * VSID will be stored
570
- * rx = scratch register (clobbered)
571
- *
572
- * - rt and rx must be different registers
573
- * - The answer will end up in the low VSID_BITS bits of rt. The higher
574
- * bits may contain other garbage, so you may need to mask the
575
- * result.
576
- */
577
- #define ASM_VSID_SCRAMBLE (rt , rx , size ) \
578
- lis rx,VSID_MULTIPLIER_##size@h; \
579
- ori rx,rx,VSID_MULTIPLIER_##size@l; \
580
- mulld rt,rt,rx; /* rt = rt * MULTIPLIER */ \
581
- \
582
- srdi rx ,rt ,VSID_BITS_ ##size ; \
583
- clrldi rt,rt,(64-VSID_BITS_##size); \
584
- add rt,rt,rx; /* add high and low bits */ \
585
- /* NOTE: explanation based on VSID_BITS_##size = 36 \
586
- * Now, r3 == VSID (mod 2^36-1), and lies between 0 and \
587
- * 2^36-1+2^28-1. That in particular means that if r3 >= \
588
- * 2^36-1, then r3+1 has the 2^36 bit set. So, if r3+1 has \
589
- * the bit clear, r3 already has the answer we want, if it \
590
- * doesn't, the answer is the low 36 bits of r3+1. So in all \
591
- * cases the answer is the low 36 bits of (r3 + ((r3+1) >> 36))*/ \
592
- addi rx ,rt ,1 ; \
593
- srdi rx ,rx ,VSID_BITS_ ##size ; /* extract 2^VSID_BITS bit */ \
594
- add rt ,rt ,rx
595
-
596
599
/* 4 bits per slice and we have one slice per 1TB */
597
600
#define SLICE_ARRAY_SIZE (H_PGTABLE_RANGE >> 41)
598
601
@@ -640,14 +643,29 @@ static inline void subpage_prot_init_new_context(struct mm_struct *mm) { }
640
643
#define vsid_scramble (protovsid , size ) \
641
644
((((protovsid) * VSID_MULTIPLIER_##size) % VSID_MODULUS_##size))
642
645
643
- #else /* 1 */
646
+ /* simplified form avoiding mod operation */
644
647
#define vsid_scramble (protovsid , size ) \
645
648
({ \
646
649
unsigned long x; \
647
650
x = (protovsid) * VSID_MULTIPLIER_##size; \
648
651
x = (x >> VSID_BITS_##size) + (x & VSID_MODULUS_##size); \
649
652
(x + ((x+1) >> VSID_BITS_##size)) & VSID_MODULUS_##size; \
650
653
})
654
+
655
+ #else /* 1 */
656
+ static inline unsigned long vsid_scramble (unsigned long protovsid ,
657
+ unsigned long vsid_multiplier , int vsid_bits )
658
+ {
659
+ unsigned long vsid ;
660
+ unsigned long vsid_modulus = ((1UL << vsid_bits ) - 1 );
661
+ /*
662
+ * We have same multipler for both 256 and 1T segements now
663
+ */
664
+ vsid = protovsid * vsid_multiplier ;
665
+ vsid = (vsid >> vsid_bits ) + (vsid & vsid_modulus );
666
+ return (vsid + ((vsid + 1 ) >> vsid_bits )) & vsid_modulus ;
667
+ }
668
+
651
669
#endif /* 1 */
652
670
653
671
/* Returns the segment size indicator for a user address */
@@ -662,17 +680,30 @@ static inline int user_segment_size(unsigned long addr)
662
680
static inline unsigned long get_vsid (unsigned long context , unsigned long ea ,
663
681
int ssize )
664
682
{
683
+ unsigned long va_bits = VA_BITS ;
684
+ unsigned long vsid_bits ;
685
+ unsigned long protovsid ;
686
+
665
687
/*
666
688
* Bad address. We return VSID 0 for that
667
689
*/
668
690
if ((ea & ~REGION_MASK ) >= H_PGTABLE_RANGE )
669
691
return 0 ;
670
692
671
- if (ssize == MMU_SEGSIZE_256M )
672
- return vsid_scramble ((context << ESID_BITS )
673
- | ((ea >> SID_SHIFT ) & ESID_BITS_MASK ), 256 M );
674
- return vsid_scramble ((context << ESID_BITS_1T )
675
- | ((ea >> SID_SHIFT_1T ) & ESID_BITS_1T_MASK ), 1 T );
693
+ if (!mmu_has_feature (MMU_FTR_68_BIT_VA ))
694
+ va_bits = 65 ;
695
+
696
+ if (ssize == MMU_SEGSIZE_256M ) {
697
+ vsid_bits = va_bits - SID_SHIFT ;
698
+ protovsid = (context << ESID_BITS ) |
699
+ ((ea >> SID_SHIFT ) & ESID_BITS_MASK );
700
+ return vsid_scramble (protovsid , VSID_MULTIPLIER_256M , vsid_bits );
701
+ }
702
+ /* 1T segment */
703
+ vsid_bits = va_bits - SID_SHIFT_1T ;
704
+ protovsid = (context << ESID_BITS_1T ) |
705
+ ((ea >> SID_SHIFT_1T ) & ESID_BITS_1T_MASK );
706
+ return vsid_scramble (protovsid , VSID_MULTIPLIER_1T , vsid_bits );
676
707
}
677
708
678
709
/*
0 commit comments