Skip to content

Commit c40785a

Browse files
ozbenhmpe
authored andcommitted
powerpc/dart: Use a cachable DART
Instead of punching a hole in the linear mapping, just use normal cachable memory, and apply the flush sequence documented in the CPC625 (aka U3) user manual. This allows us to remove quite a bit of code related to the early allocation of the DART and the hole in the linear mapping. We can also get rid of the copy of the DART for suspend/resume as the original memory can just be saved/restored now, as long as we properly sync the caches. Signed-off-by: Benjamin Herrenschmidt <[email protected]> [mpe: Integrate dart_init() fix to return ENODEV when DART disabled] Signed-off-by: Michael Ellerman <[email protected]>
1 parent de4cf3d commit c40785a

File tree

5 files changed

+88
-144
lines changed

5 files changed

+88
-144
lines changed

arch/powerpc/include/asm/iommu.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,6 @@ extern void iommu_init_early_pSeries(void);
273273
extern void iommu_init_early_dart(struct pci_controller_ops *controller_ops);
274274
extern void iommu_init_early_pasemi(void);
275275

276-
extern void alloc_dart_table(void);
277276
#if defined(CONFIG_PPC64) && defined(CONFIG_PM)
278277
static inline void iommu_save(void)
279278
{

arch/powerpc/mm/hash_utils_64.c

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,6 @@
8787
*
8888
*/
8989

90-
#ifdef CONFIG_U3_DART
91-
extern unsigned long dart_tablebase;
92-
#endif /* CONFIG_U3_DART */
93-
9490
static unsigned long _SDR1;
9591
struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
9692
EXPORT_SYMBOL_GPL(mmu_psize_defs);
@@ -846,34 +842,6 @@ static void __init htab_initialize(void)
846842
DBG("creating mapping for region: %lx..%lx (prot: %lx)\n",
847843
base, size, prot);
848844

849-
#ifdef CONFIG_U3_DART
850-
/* Do not map the DART space. Fortunately, it will be aligned
851-
* in such a way that it will not cross two memblock regions and
852-
* will fit within a single 16Mb page.
853-
* The DART space is assumed to be a full 16Mb region even if
854-
* we only use 2Mb of that space. We will use more of it later
855-
* for AGP GART. We have to use a full 16Mb large page.
856-
*/
857-
DBG("DART base: %lx\n", dart_tablebase);
858-
859-
if (dart_tablebase != 0 && dart_tablebase >= base
860-
&& dart_tablebase < (base + size)) {
861-
unsigned long dart_table_end = dart_tablebase + 16 * MB;
862-
if (base != dart_tablebase)
863-
BUG_ON(htab_bolt_mapping(base, dart_tablebase,
864-
__pa(base), prot,
865-
mmu_linear_psize,
866-
mmu_kernel_ssize));
867-
if ((base + size) > dart_table_end)
868-
BUG_ON(htab_bolt_mapping(dart_tablebase+16*MB,
869-
base + size,
870-
__pa(dart_table_end),
871-
prot,
872-
mmu_linear_psize,
873-
mmu_kernel_ssize));
874-
continue;
875-
}
876-
#endif /* CONFIG_U3_DART */
877845
BUG_ON(htab_bolt_mapping(base, base + size, __pa(base),
878846
prot, mmu_linear_psize, mmu_kernel_ssize));
879847
}

arch/powerpc/platforms/maple/setup.c

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -305,13 +305,6 @@ static int __init maple_probe(void)
305305
if (!of_flat_dt_is_compatible(root, "Momentum,Maple") &&
306306
!of_flat_dt_is_compatible(root, "Momentum,Apache"))
307307
return 0;
308-
/*
309-
* On U3, the DART (iommu) must be allocated now since it
310-
* has an impact on htab_initialize (due to the large page it
311-
* occupies having to be broken up so the DART itself is not
312-
* part of the cacheable linar mapping
313-
*/
314-
alloc_dart_table();
315308

316309
hpte_init_native();
317310
pm_power_off = maple_power_off;

arch/powerpc/platforms/powermac/setup.c

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -607,14 +607,6 @@ static int __init pmac_probe(void)
607607
return 0;
608608

609609
#ifdef CONFIG_PPC64
610-
/*
611-
* On U3, the DART (iommu) must be allocated now since it
612-
* has an impact on htab_initialize (due to the large page it
613-
* occupies having to be broken up so the DART itself is not
614-
* part of the cacheable linar mapping
615-
*/
616-
alloc_dart_table();
617-
618610
hpte_init_native();
619611
#endif
620612

arch/powerpc/sysdev/dart_iommu.c

Lines changed: 88 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -48,16 +48,10 @@
4848

4949
#include "dart.h"
5050

51-
/* Physical base address and size of the DART table */
52-
unsigned long dart_tablebase; /* exported to htab_initialize */
51+
/* DART table address and size */
52+
static u32 *dart_tablebase;
5353
static unsigned long dart_tablesize;
5454

55-
/* Virtual base address of the DART table */
56-
static u32 *dart_vbase;
57-
#ifdef CONFIG_PM
58-
static u32 *dart_copy;
59-
#endif
60-
6155
/* Mapped base address for the dart */
6256
static unsigned int __iomem *dart;
6357

@@ -151,6 +145,34 @@ static inline void dart_tlb_invalidate_one(unsigned long bus_rpn)
151145
spin_unlock_irqrestore(&invalidate_lock, flags);
152146
}
153147

148+
static void dart_cache_sync(unsigned int *base, unsigned int count)
149+
{
150+
/*
151+
* We add 1 to the number of entries to flush, following a
152+
* comment in Darwin indicating that the memory controller
153+
* can prefetch unmapped memory under some circumstances.
154+
*/
155+
unsigned long start = (unsigned long)base;
156+
unsigned long end = start + (count + 1) * sizeof(unsigned int);
157+
unsigned int tmp;
158+
159+
/* Perform a standard cache flush */
160+
flush_inval_dcache_range(start, end);
161+
162+
/*
163+
* Perform the sequence described in the CPC925 manual to
164+
* ensure all the data gets to a point the cache incoherent
165+
* DART hardware will see.
166+
*/
167+
asm volatile(" sync;"
168+
" isync;"
169+
" dcbf 0,%1;"
170+
" sync;"
171+
" isync;"
172+
" lwz %0,0(%1);"
173+
" isync" : "=r" (tmp) : "r" (end) : "memory");
174+
}
175+
154176
static void dart_flush(struct iommu_table *tbl)
155177
{
156178
mb();
@@ -165,13 +187,13 @@ static int dart_build(struct iommu_table *tbl, long index,
165187
enum dma_data_direction direction,
166188
struct dma_attrs *attrs)
167189
{
168-
unsigned int *dp;
190+
unsigned int *dp, *orig_dp;
169191
unsigned int rpn;
170192
long l;
171193

172194
DBG("dart: build at: %lx, %lx, addr: %x\n", index, npages, uaddr);
173195

174-
dp = ((unsigned int*)tbl->it_base) + index;
196+
orig_dp = dp = ((unsigned int*)tbl->it_base) + index;
175197

176198
/* On U3, all memory is contiguous, so we can move this
177199
* out of the loop.
@@ -184,11 +206,7 @@ static int dart_build(struct iommu_table *tbl, long index,
184206

185207
uaddr += DART_PAGE_SIZE;
186208
}
187-
188-
/* make sure all updates have reached memory */
189-
mb();
190-
in_be32((unsigned __iomem *)dp);
191-
mb();
209+
dart_cache_sync(orig_dp, npages);
192210

193211
if (dart_is_u4) {
194212
rpn = index;
@@ -203,7 +221,8 @@ static int dart_build(struct iommu_table *tbl, long index,
203221

204222
static void dart_free(struct iommu_table *tbl, long index, long npages)
205223
{
206-
unsigned int *dp;
224+
unsigned int *dp, *orig_dp;
225+
long orig_npages = npages;
207226

208227
/* We don't worry about flushing the TLB cache. The only drawback of
209228
* not doing it is that we won't catch buggy device drivers doing
@@ -212,34 +231,30 @@ static void dart_free(struct iommu_table *tbl, long index, long npages)
212231

213232
DBG("dart: free at: %lx, %lx\n", index, npages);
214233

215-
dp = ((unsigned int *)tbl->it_base) + index;
234+
orig_dp = dp = ((unsigned int *)tbl->it_base) + index;
216235

217236
while (npages--)
218237
*(dp++) = dart_emptyval;
219-
}
220238

239+
dart_cache_sync(orig_dp, orig_npages);
240+
}
221241

222-
static int __init dart_init(struct device_node *dart_node)
242+
static void allocate_dart(void)
223243
{
224-
unsigned int i;
225-
unsigned long tmp, base, size;
226-
struct resource r;
227-
228-
if (dart_tablebase == 0 || dart_tablesize == 0) {
229-
printk(KERN_INFO "DART: table not allocated, using "
230-
"direct DMA\n");
231-
return -ENODEV;
232-
}
244+
unsigned long tmp;
233245

234-
if (of_address_to_resource(dart_node, 0, &r))
235-
panic("DART: can't get register base ! ");
246+
/* 512 pages (2MB) is max DART tablesize. */
247+
dart_tablesize = 1UL << 21;
236248

237-
/* Make sure nothing from the DART range remains in the CPU cache
238-
* from a previous mapping that existed before the kernel took
239-
* over
249+
/*
250+
* 16MB (1 << 24) alignment. We allocate a full 16Mb chuck since we
251+
* will blow up an entire large page anyway in the kernel mapping.
240252
*/
241-
flush_dcache_phys_range(dart_tablebase,
242-
dart_tablebase + dart_tablesize);
253+
dart_tablebase = __va(memblock_alloc_base(1UL<<24,
254+
1UL<<24, 0x80000000L));
255+
256+
/* There is no point scanning the DART space for leaks*/
257+
kmemleak_no_scan((void *)dart_tablebase);
243258

244259
/* Allocate a spare page to map all invalid DART pages. We need to do
245260
* that to work around what looks like a problem with the HT bridge
@@ -249,20 +264,51 @@ static int __init dart_init(struct device_node *dart_node)
249264
dart_emptyval = DARTMAP_VALID | ((tmp >> DART_PAGE_SHIFT) &
250265
DARTMAP_RPNMASK);
251266

267+
printk(KERN_INFO "DART table allocated at: %p\n", dart_tablebase);
268+
}
269+
270+
static int __init dart_init(struct device_node *dart_node)
271+
{
272+
unsigned int i;
273+
unsigned long base, size;
274+
struct resource r;
275+
276+
/* IOMMU disabled by the user ? bail out */
277+
if (iommu_is_off)
278+
return -ENODEV;
279+
280+
/*
281+
* Only use the DART if the machine has more than 1GB of RAM
282+
* or if requested with iommu=on on cmdline.
283+
*
284+
* 1GB of RAM is picked as limit because some default devices
285+
* (i.e. Airport Extreme) have 30 bit address range limits.
286+
*/
287+
288+
if (!iommu_force_on && memblock_end_of_DRAM() <= 0x40000000ull)
289+
return -ENODEV;
290+
291+
/* Get DART registers */
292+
if (of_address_to_resource(dart_node, 0, &r))
293+
panic("DART: can't get register base ! ");
294+
252295
/* Map in DART registers */
253296
dart = ioremap(r.start, resource_size(&r));
254297
if (dart == NULL)
255298
panic("DART: Cannot map registers!");
256299

257-
/* Map in DART table */
258-
dart_vbase = ioremap(__pa(dart_tablebase), dart_tablesize);
300+
/* Allocate the DART and dummy page */
301+
allocate_dart();
259302

260303
/* Fill initial table */
261304
for (i = 0; i < dart_tablesize/4; i++)
262-
dart_vbase[i] = dart_emptyval;
305+
dart_tablebase[i] = dart_emptyval;
306+
307+
/* Push to memory */
308+
dart_cache_sync(dart_tablebase, dart_tablesize / sizeof(u32));
263309

264310
/* Initialize DART with table base and enable it. */
265-
base = dart_tablebase >> DART_PAGE_SHIFT;
311+
base = ((unsigned long)dart_tablebase) >> DART_PAGE_SHIFT;
266312
size = dart_tablesize >> DART_PAGE_SHIFT;
267313
if (dart_is_u4) {
268314
size &= DART_SIZE_U4_SIZE_MASK;
@@ -301,7 +347,7 @@ static void iommu_table_dart_setup(void)
301347
iommu_table_dart.it_page_shift = IOMMU_PAGE_SHIFT_4K;
302348

303349
/* Initialize the common IOMMU code */
304-
iommu_table_dart.it_base = (unsigned long)dart_vbase;
350+
iommu_table_dart.it_base = (unsigned long)dart_tablebase;
305351
iommu_table_dart.it_index = 0;
306352
iommu_table_dart.it_blocksize = 1;
307353
iommu_table_dart.it_ops = &iommu_dart_ops;
@@ -404,75 +450,21 @@ void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops)
404450
}
405451

406452
#ifdef CONFIG_PM
407-
static void iommu_dart_save(void)
408-
{
409-
memcpy(dart_copy, dart_vbase, 2*1024*1024);
410-
}
411-
412453
static void iommu_dart_restore(void)
413454
{
414-
memcpy(dart_vbase, dart_copy, 2*1024*1024);
455+
dart_cache_sync(dart_tablebase, dart_tablesize / sizeof(u32));
415456
dart_tlb_invalidate_all();
416457
}
417458

418459
static int __init iommu_init_late_dart(void)
419460
{
420-
unsigned long tbasepfn;
421-
struct page *p;
422-
423-
/* if no dart table exists then we won't need to save it
424-
* and the area has also not been reserved */
425461
if (!dart_tablebase)
426462
return 0;
427463

428-
tbasepfn = __pa(dart_tablebase) >> PAGE_SHIFT;
429-
register_nosave_region_late(tbasepfn,
430-
tbasepfn + ((1<<24) >> PAGE_SHIFT));
431-
432-
/* For suspend we need to copy the dart contents because
433-
* it is not part of the regular mapping (see above) and
434-
* thus not saved automatically. The memory for this copy
435-
* must be allocated early because we need 2 MB. */
436-
p = alloc_pages(GFP_KERNEL, 21 - PAGE_SHIFT);
437-
BUG_ON(!p);
438-
dart_copy = page_address(p);
439-
440-
ppc_md.iommu_save = iommu_dart_save;
441464
ppc_md.iommu_restore = iommu_dart_restore;
442465

443466
return 0;
444467
}
445468

446469
late_initcall(iommu_init_late_dart);
447-
#endif
448-
449-
void __init alloc_dart_table(void)
450-
{
451-
/* Only reserve DART space if machine has more than 1GB of RAM
452-
* or if requested with iommu=on on cmdline.
453-
*
454-
* 1GB of RAM is picked as limit because some default devices
455-
* (i.e. Airport Extreme) have 30 bit address range limits.
456-
*/
457-
458-
if (iommu_is_off)
459-
return;
460-
461-
if (!iommu_force_on && memblock_end_of_DRAM() <= 0x40000000ull)
462-
return;
463-
464-
/* 512 pages (2MB) is max DART tablesize. */
465-
dart_tablesize = 1UL << 21;
466-
/* 16MB (1 << 24) alignment. We allocate a full 16Mb chuck since we
467-
* will blow up an entire large page anyway in the kernel mapping
468-
*/
469-
dart_tablebase = (unsigned long)
470-
__va(memblock_alloc_base(1UL<<24, 1UL<<24, 0x80000000L));
471-
/*
472-
* The DART space is later unmapped from the kernel linear mapping and
473-
* accessing dart_tablebase during kmemleak scanning will fault.
474-
*/
475-
kmemleak_no_scan((void *)dart_tablebase);
476-
477-
printk(KERN_INFO "DART table allocated at: %lx\n", dart_tablebase);
478-
}
470+
#endif /* CONFIG_PM */

0 commit comments

Comments
 (0)