Skip to content

Commit 1c8242c

Browse files
committed
drm/i915: Use unchecked writes for setting up the fences
As the fence registers are not part of the engine powerwells, we do not need to fiddle with forcewake in order to update a fence. Avoid using the heavyweight debug checking normal mmio writes as the checking dominates the selftest runtime and is superfluous! In the process, retire the I915_WRITE() implicit macro with the new intel_uncore_write interface. v2: s/unc/uncore/ Signed-off-by: Chris Wilson <[email protected]> Reviewed-by: Joonas Lahtinen <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent 87d1372 commit 1c8242c

File tree

1 file changed

+68
-55
lines changed

1 file changed

+68
-55
lines changed

drivers/gpu/drm/i915/i915_gem_fence_reg.c

Lines changed: 68 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -94,9 +94,10 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
9494
}
9595

9696
if (!pipelined) {
97-
struct drm_i915_private *dev_priv = fence->i915;
97+
struct intel_uncore *uncore = &fence->i915->uncore;
9898

99-
/* To w/a incoherency with non-atomic 64-bit register updates,
99+
/*
100+
* To w/a incoherency with non-atomic 64-bit register updates,
100101
* we split the 64-bit update into two 32-bit writes. In order
101102
* for a partial fence not to be evaluated between writes, we
102103
* precede the update with write to turn off the fence register,
@@ -105,12 +106,12 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
105106
* For extra levels of paranoia, we make sure each step lands
106107
* before applying the next step.
107108
*/
108-
I915_WRITE(fence_reg_lo, 0);
109-
POSTING_READ(fence_reg_lo);
109+
intel_uncore_write_fw(uncore, fence_reg_lo, 0);
110+
intel_uncore_posting_read_fw(uncore, fence_reg_lo);
110111

111-
I915_WRITE(fence_reg_hi, upper_32_bits(val));
112-
I915_WRITE(fence_reg_lo, lower_32_bits(val));
113-
POSTING_READ(fence_reg_lo);
112+
intel_uncore_write_fw(uncore, fence_reg_hi, upper_32_bits(val));
113+
intel_uncore_write_fw(uncore, fence_reg_lo, lower_32_bits(val));
114+
intel_uncore_posting_read_fw(uncore, fence_reg_lo);
114115
}
115116
}
116117

@@ -146,11 +147,11 @@ static void i915_write_fence_reg(struct drm_i915_fence_reg *fence,
146147
}
147148

148149
if (!pipelined) {
149-
struct drm_i915_private *dev_priv = fence->i915;
150+
struct intel_uncore *uncore = &fence->i915->uncore;
150151
i915_reg_t reg = FENCE_REG(fence->id);
151152

152-
I915_WRITE(reg, val);
153-
POSTING_READ(reg);
153+
intel_uncore_write_fw(uncore, reg, val);
154+
intel_uncore_posting_read_fw(uncore, reg);
154155
}
155156
}
156157

@@ -178,18 +179,19 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *fence,
178179
}
179180

180181
if (!pipelined) {
181-
struct drm_i915_private *dev_priv = fence->i915;
182+
struct intel_uncore *uncore = &fence->i915->uncore;
182183
i915_reg_t reg = FENCE_REG(fence->id);
183184

184-
I915_WRITE(reg, val);
185-
POSTING_READ(reg);
185+
intel_uncore_write_fw(uncore, reg, val);
186+
intel_uncore_posting_read_fw(uncore, reg);
186187
}
187188
}
188189

189190
static void fence_write(struct drm_i915_fence_reg *fence,
190191
struct i915_vma *vma)
191192
{
192-
/* Previous access through the fence register is marshalled by
193+
/*
194+
* Previous access through the fence register is marshalled by
193195
* the mb() inside the fault handlers (i915_gem_release_mmaps)
194196
* and explicitly managed for internal users.
195197
*/
@@ -201,7 +203,8 @@ static void fence_write(struct drm_i915_fence_reg *fence,
201203
else
202204
i965_write_fence_reg(fence, vma);
203205

204-
/* Access through the fenced region afterwards is
206+
/*
207+
* Access through the fenced region afterwards is
205208
* ordered by the posting reads whilst writing the registers.
206209
*/
207210

@@ -308,11 +311,11 @@ int i915_vma_put_fence(struct i915_vma *vma)
308311
return fence_update(fence, NULL);
309312
}
310313

311-
static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv)
314+
static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *i915)
312315
{
313316
struct drm_i915_fence_reg *fence;
314317

315-
list_for_each_entry(fence, &dev_priv->mm.fence_list, link) {
318+
list_for_each_entry(fence, &i915->mm.fence_list, link) {
316319
GEM_BUG_ON(fence->vma && fence->vma->fence != fence);
317320

318321
if (fence->pin_count)
@@ -322,7 +325,7 @@ static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv)
322325
}
323326

324327
/* Wait for completion of pending flips which consume fences */
325-
if (intel_has_pending_fb_unpin(dev_priv))
328+
if (intel_has_pending_fb_unpin(i915))
326329
return ERR_PTR(-EAGAIN);
327330

328331
return ERR_PTR(-EDEADLK);
@@ -353,7 +356,8 @@ i915_vma_pin_fence(struct i915_vma *vma)
353356
struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL;
354357
int err;
355358

356-
/* Note that we revoke fences on runtime suspend. Therefore the user
359+
/*
360+
* Note that we revoke fences on runtime suspend. Therefore the user
357361
* must keep the device awake whilst using the fence.
358362
*/
359363
assert_rpm_wakelock_held(vma->vm->i915);
@@ -395,28 +399,28 @@ i915_vma_pin_fence(struct i915_vma *vma)
395399

396400
/**
397401
* i915_reserve_fence - Reserve a fence for vGPU
398-
* @dev_priv: i915 device private
402+
* @i915: i915 device private
399403
*
400404
* This function walks the fence regs looking for a free one and remove
401405
* it from the fence_list. It is used to reserve fence for vGPU to use.
402406
*/
403407
struct drm_i915_fence_reg *
404-
i915_reserve_fence(struct drm_i915_private *dev_priv)
408+
i915_reserve_fence(struct drm_i915_private *i915)
405409
{
406410
struct drm_i915_fence_reg *fence;
407411
int count;
408412
int ret;
409413

410-
lockdep_assert_held(&dev_priv->drm.struct_mutex);
414+
lockdep_assert_held(&i915->drm.struct_mutex);
411415

412416
/* Keep at least one fence available for the display engine. */
413417
count = 0;
414-
list_for_each_entry(fence, &dev_priv->mm.fence_list, link)
418+
list_for_each_entry(fence, &i915->mm.fence_list, link)
415419
count += !fence->pin_count;
416420
if (count <= 1)
417421
return ERR_PTR(-ENOSPC);
418422

419-
fence = fence_find(dev_priv);
423+
fence = fence_find(i915);
420424
if (IS_ERR(fence))
421425
return fence;
422426

@@ -446,19 +450,19 @@ void i915_unreserve_fence(struct drm_i915_fence_reg *fence)
446450

447451
/**
448452
* i915_gem_restore_fences - restore fence state
449-
* @dev_priv: i915 device private
453+
* @i915: i915 device private
450454
*
451455
* Restore the hw fence state to match the software tracking again, to be called
452456
* after a gpu reset and on resume. Note that on runtime suspend we only cancel
453457
* the fences, to be reacquired by the user later.
454458
*/
455-
void i915_gem_restore_fences(struct drm_i915_private *dev_priv)
459+
void i915_gem_restore_fences(struct drm_i915_private *i915)
456460
{
457461
int i;
458462

459463
rcu_read_lock(); /* keep obj alive as we dereference */
460-
for (i = 0; i < dev_priv->num_fence_regs; i++) {
461-
struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
464+
for (i = 0; i < i915->num_fence_regs; i++) {
465+
struct drm_i915_fence_reg *reg = &i915->fence_regs[i];
462466
struct i915_vma *vma = READ_ONCE(reg->vma);
463467

464468
GEM_BUG_ON(vma && vma->fence != reg);
@@ -525,18 +529,19 @@ void i915_gem_restore_fences(struct drm_i915_private *dev_priv)
525529

526530
/**
527531
* i915_gem_detect_bit_6_swizzle - detect bit 6 swizzling pattern
528-
* @dev_priv: i915 device private
532+
* @i915: i915 device private
529533
*
530534
* Detects bit 6 swizzling of address lookup between IGD access and CPU
531535
* access through main memory.
532536
*/
533537
void
534-
i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv)
538+
i915_gem_detect_bit_6_swizzle(struct drm_i915_private *i915)
535539
{
540+
struct intel_uncore *uncore = &i915->uncore;
536541
u32 swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
537542
u32 swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
538543

539-
if (INTEL_GEN(dev_priv) >= 8 || IS_VALLEYVIEW(dev_priv)) {
544+
if (INTEL_GEN(i915) >= 8 || IS_VALLEYVIEW(i915)) {
540545
/*
541546
* On BDW+, swizzling is not used. We leave the CPU memory
542547
* controller in charge of optimizing memory accesses without
@@ -546,9 +551,9 @@ i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv)
546551
*/
547552
swizzle_x = I915_BIT_6_SWIZZLE_NONE;
548553
swizzle_y = I915_BIT_6_SWIZZLE_NONE;
549-
} else if (INTEL_GEN(dev_priv) >= 6) {
550-
if (dev_priv->preserve_bios_swizzle) {
551-
if (I915_READ(DISP_ARB_CTL) &
554+
} else if (INTEL_GEN(i915) >= 6) {
555+
if (i915->preserve_bios_swizzle) {
556+
if (intel_uncore_read(uncore, DISP_ARB_CTL) &
552557
DISP_TILE_SURFACE_SWIZZLING) {
553558
swizzle_x = I915_BIT_6_SWIZZLE_9_10;
554559
swizzle_y = I915_BIT_6_SWIZZLE_9;
@@ -558,15 +563,17 @@ i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv)
558563
}
559564
} else {
560565
u32 dimm_c0, dimm_c1;
561-
dimm_c0 = I915_READ(MAD_DIMM_C0);
562-
dimm_c1 = I915_READ(MAD_DIMM_C1);
566+
dimm_c0 = intel_uncore_read(uncore, MAD_DIMM_C0);
567+
dimm_c1 = intel_uncore_read(uncore, MAD_DIMM_C1);
563568
dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
564569
dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
565-
/* Enable swizzling when the channels are populated
570+
/*
571+
* Enable swizzling when the channels are populated
566572
* with identically sized dimms. We don't need to check
567573
* the 3rd channel because no cpu with gpu attached
568574
* ships in that configuration. Also, swizzling only
569-
* makes sense for 2 channels anyway. */
575+
* makes sense for 2 channels anyway.
576+
*/
570577
if (dimm_c0 == dimm_c1) {
571578
swizzle_x = I915_BIT_6_SWIZZLE_9_10;
572579
swizzle_y = I915_BIT_6_SWIZZLE_9;
@@ -575,20 +582,23 @@ i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv)
575582
swizzle_y = I915_BIT_6_SWIZZLE_NONE;
576583
}
577584
}
578-
} else if (IS_GEN(dev_priv, 5)) {
579-
/* On Ironlake whatever DRAM config, GPU always do
585+
} else if (IS_GEN(i915, 5)) {
586+
/*
587+
* On Ironlake whatever DRAM config, GPU always do
580588
* same swizzling setup.
581589
*/
582590
swizzle_x = I915_BIT_6_SWIZZLE_9_10;
583591
swizzle_y = I915_BIT_6_SWIZZLE_9;
584-
} else if (IS_GEN(dev_priv, 2)) {
585-
/* As far as we know, the 865 doesn't have these bit 6
592+
} else if (IS_GEN(i915, 2)) {
593+
/*
594+
* As far as we know, the 865 doesn't have these bit 6
586595
* swizzling issues.
587596
*/
588597
swizzle_x = I915_BIT_6_SWIZZLE_NONE;
589598
swizzle_y = I915_BIT_6_SWIZZLE_NONE;
590-
} else if (IS_G45(dev_priv) || IS_I965G(dev_priv) || IS_G33(dev_priv)) {
591-
/* The 965, G33, and newer, have a very flexible memory
599+
} else if (IS_G45(i915) || IS_I965G(i915) || IS_G33(i915)) {
600+
/*
601+
* The 965, G33, and newer, have a very flexible memory
592602
* configuration. It will enable dual-channel mode
593603
* (interleaving) on as much memory as it can, and the GPU
594604
* will additionally sometimes enable different bit 6
@@ -614,22 +624,23 @@ i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv)
614624
* banks of memory are paired and unswizzled on the
615625
* uneven portion, so leave that as unknown.
616626
*/
617-
if (I915_READ16(C0DRB3) == I915_READ16(C1DRB3)) {
627+
if (intel_uncore_read(uncore, C0DRB3) ==
628+
intel_uncore_read(uncore, C1DRB3)) {
618629
swizzle_x = I915_BIT_6_SWIZZLE_9_10;
619630
swizzle_y = I915_BIT_6_SWIZZLE_9;
620631
}
621632
} else {
622-
u32 dcc;
633+
u32 dcc = intel_uncore_read(uncore, DCC);
623634

624-
/* On 9xx chipsets, channel interleave by the CPU is
635+
/*
636+
* On 9xx chipsets, channel interleave by the CPU is
625637
* determined by DCC. For single-channel, neither the CPU
626638
* nor the GPU do swizzling. For dual channel interleaved,
627639
* the GPU's interleave is bit 9 and 10 for X tiled, and bit
628640
* 9 for Y tiled. The CPU's interleave is independent, and
629641
* can be based on either bit 11 (haven't seen this yet) or
630642
* bit 17 (common).
631643
*/
632-
dcc = I915_READ(DCC);
633644
switch (dcc & DCC_ADDRESSING_MODE_MASK) {
634645
case DCC_ADDRESSING_MODE_SINGLE_CHANNEL:
635646
case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC:
@@ -638,7 +649,8 @@ i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv)
638649
break;
639650
case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED:
640651
if (dcc & DCC_CHANNEL_XOR_DISABLE) {
641-
/* This is the base swizzling by the GPU for
652+
/*
653+
* This is the base swizzling by the GPU for
642654
* tiled buffers.
643655
*/
644656
swizzle_x = I915_BIT_6_SWIZZLE_9_10;
@@ -656,8 +668,8 @@ i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv)
656668
}
657669

658670
/* check for L-shaped memory aka modified enhanced addressing */
659-
if (IS_GEN(dev_priv, 4) &&
660-
!(I915_READ(DCC2) & DCC2_MODIFIED_ENHANCED_DISABLE)) {
671+
if (IS_GEN(i915, 4) &&
672+
!(intel_uncore_read(uncore, DCC2) & DCC2_MODIFIED_ENHANCED_DISABLE)) {
661673
swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
662674
swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
663675
}
@@ -672,7 +684,8 @@ i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv)
672684

673685
if (swizzle_x == I915_BIT_6_SWIZZLE_UNKNOWN ||
674686
swizzle_y == I915_BIT_6_SWIZZLE_UNKNOWN) {
675-
/* Userspace likes to explode if it sees unknown swizzling,
687+
/*
688+
* Userspace likes to explode if it sees unknown swizzling,
676689
* so lie. We will finish the lie when reporting through
677690
* the get-tiling-ioctl by reporting the physical swizzle
678691
* mode as unknown instead.
@@ -681,13 +694,13 @@ i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv)
681694
* bit17 dependent, and so we need to also prevent the pages
682695
* from being moved.
683696
*/
684-
dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES;
697+
i915->quirks |= QUIRK_PIN_SWIZZLED_PAGES;
685698
swizzle_x = I915_BIT_6_SWIZZLE_NONE;
686699
swizzle_y = I915_BIT_6_SWIZZLE_NONE;
687700
}
688701

689-
dev_priv->mm.bit_6_swizzle_x = swizzle_x;
690-
dev_priv->mm.bit_6_swizzle_y = swizzle_y;
702+
i915->mm.bit_6_swizzle_x = swizzle_x;
703+
i915->mm.bit_6_swizzle_y = swizzle_y;
691704
}
692705

693706
/*

0 commit comments

Comments
 (0)