summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/selftests
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2019-01-25 13:22:28 +0000
committerChris Wilson <chris@chris-wilson.co.uk>2019-01-25 14:27:22 +0000
commiteb8d0f5af4ec2d172baf8b4b9a2199cd916b4e54 (patch)
tree28293a5cdfd09863ce764d181c5039cce25b79a2 /drivers/gpu/drm/i915/selftests
parentfe62365f9f80a1c1d438c54fba21f5108a182de8 (diff)
drm/i915: Remove GPU reset dependence on struct_mutex
Now that the submission backends are controlled via their own spinlocks, with a wave of a magic wand we can lift the struct_mutex requirement around GPU reset. That is we allow the submission frontend (userspace) to keep on submitting while we process the GPU reset as we can suspend the backend independently. The major change is around the backoff/handoff strategy for performing the reset. With no mutex deadlock, we no longer have to coordinate with any waiter, and just perform the reset immediately. Testcase: igt/gem_mmap_gtt/hang # regresses Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190125132230.22221-3-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/selftests')
-rw-r--r--drivers/gpu/drm/i915/selftests/intel_hangcheck.c57
-rw-r--r--drivers/gpu/drm/i915/selftests/intel_workarounds.c3
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_gem_device.c4
3 files changed, 31 insertions, 33 deletions
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index 12550b55c42f..67431355cd6e 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -363,9 +363,7 @@ static int igt_global_reset(void *arg)
/* Check that we can issue a global GPU reset */
igt_global_reset_lock(i915);
- set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
- mutex_lock(&i915->drm.struct_mutex);
reset_count = i915_reset_count(&i915->gpu_error);
i915_reset(i915, ALL_ENGINES, NULL);
@@ -374,9 +372,7 @@ static int igt_global_reset(void *arg)
pr_err("No GPU reset recorded!\n");
err = -EINVAL;
}
- mutex_unlock(&i915->drm.struct_mutex);
- GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
igt_global_reset_unlock(i915);
if (i915_terminally_wedged(&i915->gpu_error))
@@ -399,9 +395,7 @@ static int igt_wedged_reset(void *arg)
i915_gem_set_wedged(i915);
GEM_BUG_ON(!i915_terminally_wedged(&i915->gpu_error));
- set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
i915_reset(i915, ALL_ENGINES, NULL);
- GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
intel_runtime_pm_put(i915, wakeref);
mutex_unlock(&i915->drm.struct_mutex);
@@ -511,7 +505,7 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
break;
}
- if (!wait_for_idle(engine)) {
+ if (!i915_reset_flush(i915)) {
struct drm_printer p =
drm_info_printer(i915->drm.dev);
@@ -903,20 +897,13 @@ static int igt_reset_engines(void *arg)
return 0;
}
-static u32 fake_hangcheck(struct i915_request *rq, u32 mask)
+static u32 fake_hangcheck(struct drm_i915_private *i915, u32 mask)
{
- struct i915_gpu_error *error = &rq->i915->gpu_error;
- u32 reset_count = i915_reset_count(error);
-
- error->stalled_mask = mask;
-
- /* set_bit() must be after we have setup the backchannel (mask) */
- smp_mb__before_atomic();
- set_bit(I915_RESET_HANDOFF, &error->flags);
+ u32 count = i915_reset_count(&i915->gpu_error);
- wake_up_all(&error->wait_queue);
+ i915_reset(i915, mask, NULL);
- return reset_count;
+ return count;
}
static int igt_reset_wait(void *arg)
@@ -962,7 +949,7 @@ static int igt_reset_wait(void *arg)
goto out_rq;
}
- reset_count = fake_hangcheck(rq, ALL_ENGINES);
+ reset_count = fake_hangcheck(i915, ALL_ENGINES);
timeout = i915_request_wait(rq, I915_WAIT_LOCKED, 10);
if (timeout < 0) {
@@ -972,7 +959,6 @@ static int igt_reset_wait(void *arg)
goto out_rq;
}
- GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
if (i915_reset_count(&i915->gpu_error) == reset_count) {
pr_err("No GPU reset recorded!\n");
err = -EINVAL;
@@ -1162,7 +1148,7 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915,
}
out_reset:
- fake_hangcheck(rq, intel_engine_flag(rq->engine));
+ fake_hangcheck(rq->i915, intel_engine_flag(rq->engine));
if (tsk) {
struct igt_wedge_me w;
@@ -1341,12 +1327,7 @@ static int igt_reset_queue(void *arg)
goto fini;
}
- reset_count = fake_hangcheck(prev, ENGINE_MASK(id));
-
- i915_reset(i915, ENGINE_MASK(id), NULL);
-
- GEM_BUG_ON(test_bit(I915_RESET_HANDOFF,
- &i915->gpu_error.flags));
+ reset_count = fake_hangcheck(i915, ENGINE_MASK(id));
if (prev->fence.error != -EIO) {
pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n",
@@ -1565,6 +1546,7 @@ static int igt_atomic_reset_engine(struct intel_engine_cs *engine,
pr_err("%s(%s): Failed to start request %llx, at %x\n",
__func__, engine->name,
rq->fence.seqno, hws_seqno(&h, rq));
+ i915_gem_set_wedged(i915);
err = -EIO;
}
@@ -1588,7 +1570,6 @@ out:
static void force_reset(struct drm_i915_private *i915)
{
i915_gem_set_wedged(i915);
- set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
i915_reset(i915, 0, NULL);
}
@@ -1618,6 +1599,26 @@ static int igt_atomic_reset(void *arg)
if (i915_terminally_wedged(&i915->gpu_error))
goto unlock;
+ if (intel_has_gpu_reset(i915)) {
+ const typeof(*phases) *p;
+
+ for (p = phases; p->name; p++) {
+ GEM_TRACE("intel_gpu_reset under %s\n", p->name);
+
+ p->critical_section_begin();
+ err = intel_gpu_reset(i915, ALL_ENGINES);
+ p->critical_section_end();
+
+ if (err) {
+ pr_err("intel_gpu_reset failed under %s\n",
+ p->name);
+ goto out;
+ }
+ }
+
+ force_reset(i915);
+ }
+
if (intel_has_reset_engine(i915)) {
struct intel_engine_cs *engine;
enum intel_engine_id id;
diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
index a8cac56be835..b15c4f26c593 100644
--- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
@@ -214,7 +214,6 @@ out_put:
static int do_device_reset(struct intel_engine_cs *engine)
{
- set_bit(I915_RESET_HANDOFF, &engine->i915->gpu_error.flags);
i915_reset(engine->i915, ENGINE_MASK(engine->id), "live_workarounds");
return 0;
}
@@ -394,7 +393,6 @@ static int
live_gpu_reset_gt_engine_workarounds(void *arg)
{
struct drm_i915_private *i915 = arg;
- struct i915_gpu_error *error = &i915->gpu_error;
intel_wakeref_t wakeref;
struct wa_lists lists;
bool ok;
@@ -413,7 +411,6 @@ live_gpu_reset_gt_engine_workarounds(void *arg)
if (!ok)
goto out;
- set_bit(I915_RESET_HANDOFF, &error->flags);
i915_reset(i915, ALL_ENGINES, "live_workarounds");
ok = verify_gt_engine_wa(i915, &lists, "after reset");
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 5477ad4a7e7d..8ab5a2688a0c 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -58,8 +58,8 @@ static void mock_device_release(struct drm_device *dev)
i915_gem_contexts_lost(i915);
mutex_unlock(&i915->drm.struct_mutex);
- cancel_delayed_work_sync(&i915->gt.retire_work);
- cancel_delayed_work_sync(&i915->gt.idle_work);
+ drain_delayed_work(&i915->gt.retire_work);
+ drain_delayed_work(&i915->gt.idle_work);
i915_gem_drain_workqueue(i915);
mutex_lock(&i915->drm.struct_mutex);