Linux-6.18.2/drivers/gpu/drm/xe/xe_tlb_inval.c
2025-12-23 20:06:59 +08:00

434 lines
13 KiB
C

// SPDX-License-Identifier: MIT
/*
* Copyright © 2023 Intel Corporation
*/
#include <drm/drm_managed.h>
#include "abi/guc_actions_abi.h"
#include "xe_device.h"
#include "xe_force_wake.h"
#include "xe_gt.h"
#include "xe_gt_printk.h"
#include "xe_gt_stats.h"
#include "xe_guc.h"
#include "xe_guc_ct.h"
#include "xe_guc_tlb_inval.h"
#include "xe_mmio.h"
#include "xe_pm.h"
#include "xe_tlb_inval.h"
#include "xe_trace.h"
/**
* DOC: Xe TLB invalidation
*
* Xe TLB invalidation is implemented in two layers. The first is the frontend
* API, which provides an interface for TLB invalidations to the driver code.
* The frontend handles seqno assignment, synchronization (fences), and the
* timeout mechanism. The frontend is implemented via an embedded structure
* xe_tlb_inval that includes a set of ops hooking into the backend. The backend
* interacts with the hardware (or firmware) to perform the actual invalidation.
*/
#define FENCE_STACK_BIT DMA_FENCE_FLAG_USER_BITS
static void xe_tlb_inval_fence_fini(struct xe_tlb_inval_fence *fence)
{
if (WARN_ON_ONCE(!fence->tlb_inval))
return;
xe_pm_runtime_put(fence->tlb_inval->xe);
fence->tlb_inval = NULL; /* fini() should be called once */
}
static void
xe_tlb_inval_fence_signal(struct xe_tlb_inval_fence *fence)
{
bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags);
lockdep_assert_held(&fence->tlb_inval->pending_lock);
list_del(&fence->link);
trace_xe_tlb_inval_fence_signal(fence->tlb_inval->xe, fence);
xe_tlb_inval_fence_fini(fence);
dma_fence_signal(&fence->base);
if (!stack)
dma_fence_put(&fence->base);
}
static void
xe_tlb_inval_fence_signal_unlocked(struct xe_tlb_inval_fence *fence)
{
struct xe_tlb_inval *tlb_inval = fence->tlb_inval;
spin_lock_irq(&tlb_inval->pending_lock);
xe_tlb_inval_fence_signal(fence);
spin_unlock_irq(&tlb_inval->pending_lock);
}
static void xe_tlb_inval_fence_timeout(struct work_struct *work)
{
struct xe_tlb_inval *tlb_inval = container_of(work, struct xe_tlb_inval,
fence_tdr.work);
struct xe_device *xe = tlb_inval->xe;
struct xe_tlb_inval_fence *fence, *next;
long timeout_delay = tlb_inval->ops->timeout_delay(tlb_inval);
tlb_inval->ops->flush(tlb_inval);
spin_lock_irq(&tlb_inval->pending_lock);
list_for_each_entry_safe(fence, next,
&tlb_inval->pending_fences, link) {
s64 since_inval_ms = ktime_ms_delta(ktime_get(),
fence->inval_time);
if (msecs_to_jiffies(since_inval_ms) < timeout_delay)
break;
trace_xe_tlb_inval_fence_timeout(xe, fence);
drm_err(&xe->drm,
"TLB invalidation fence timeout, seqno=%d recv=%d",
fence->seqno, tlb_inval->seqno_recv);
fence->base.error = -ETIME;
xe_tlb_inval_fence_signal(fence);
}
if (!list_empty(&tlb_inval->pending_fences))
queue_delayed_work(system_wq, &tlb_inval->fence_tdr,
timeout_delay);
spin_unlock_irq(&tlb_inval->pending_lock);
}
/**
* tlb_inval_fini - Clean up TLB invalidation state
* @drm: @drm_device
* @arg: pointer to struct @xe_tlb_inval
*
* Cancel pending fence workers and clean up any additional
* TLB invalidation state.
*/
static void tlb_inval_fini(struct drm_device *drm, void *arg)
{
struct xe_tlb_inval *tlb_inval = arg;
xe_tlb_inval_reset(tlb_inval);
}
/**
* xe_gt_tlb_inval_init - Initialize TLB invalidation state
* @gt: GT structure
*
* Initialize TLB invalidation state, purely software initialization, should
* be called once during driver load.
*
* Return: 0 on success, negative error code on error.
*/
int xe_gt_tlb_inval_init_early(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
struct xe_tlb_inval *tlb_inval = &gt->tlb_inval;
int err;
tlb_inval->xe = xe;
tlb_inval->seqno = 1;
INIT_LIST_HEAD(&tlb_inval->pending_fences);
spin_lock_init(&tlb_inval->pending_lock);
spin_lock_init(&tlb_inval->lock);
INIT_DELAYED_WORK(&tlb_inval->fence_tdr, xe_tlb_inval_fence_timeout);
err = drmm_mutex_init(&xe->drm, &tlb_inval->seqno_lock);
if (err)
return err;
tlb_inval->job_wq = drmm_alloc_ordered_workqueue(&xe->drm,
"gt-tbl-inval-job-wq",
WQ_MEM_RECLAIM);
if (IS_ERR(tlb_inval->job_wq))
return PTR_ERR(tlb_inval->job_wq);
/* XXX: Blindly setting up backend to GuC */
xe_guc_tlb_inval_init_early(&gt->uc.guc, tlb_inval);
return drmm_add_action_or_reset(&xe->drm, tlb_inval_fini, tlb_inval);
}
/**
* xe_tlb_inval_reset() - TLB invalidation reset
* @tlb_inval: TLB invalidation client
*
* Signal any pending invalidation fences, should be called during a GT reset
*/
void xe_tlb_inval_reset(struct xe_tlb_inval *tlb_inval)
{
struct xe_tlb_inval_fence *fence, *next;
int pending_seqno;
/*
* we can get here before the backends are even initialized if we're
* wedging very early, in which case there are not going to be any
* pendind fences so we can bail immediately.
*/
if (!tlb_inval->ops->initialized(tlb_inval))
return;
/*
* Backend is already disabled at this point. No new TLB requests can
* appear.
*/
mutex_lock(&tlb_inval->seqno_lock);
spin_lock_irq(&tlb_inval->pending_lock);
cancel_delayed_work(&tlb_inval->fence_tdr);
/*
* We might have various kworkers waiting for TLB flushes to complete
* which are not tracked with an explicit TLB fence, however at this
* stage that will never happen since the backend is already disabled,
* so make sure we signal them here under the assumption that we have
* completed a full GT reset.
*/
if (tlb_inval->seqno == 1)
pending_seqno = TLB_INVALIDATION_SEQNO_MAX - 1;
else
pending_seqno = tlb_inval->seqno - 1;
WRITE_ONCE(tlb_inval->seqno_recv, pending_seqno);
list_for_each_entry_safe(fence, next,
&tlb_inval->pending_fences, link)
xe_tlb_inval_fence_signal(fence);
spin_unlock_irq(&tlb_inval->pending_lock);
mutex_unlock(&tlb_inval->seqno_lock);
}
static bool xe_tlb_inval_seqno_past(struct xe_tlb_inval *tlb_inval, int seqno)
{
int seqno_recv = READ_ONCE(tlb_inval->seqno_recv);
lockdep_assert_held(&tlb_inval->pending_lock);
if (seqno - seqno_recv < -(TLB_INVALIDATION_SEQNO_MAX / 2))
return false;
if (seqno - seqno_recv > (TLB_INVALIDATION_SEQNO_MAX / 2))
return true;
return seqno_recv >= seqno;
}
static void xe_tlb_inval_fence_prep(struct xe_tlb_inval_fence *fence)
{
struct xe_tlb_inval *tlb_inval = fence->tlb_inval;
fence->seqno = tlb_inval->seqno;
trace_xe_tlb_inval_fence_send(tlb_inval->xe, fence);
spin_lock_irq(&tlb_inval->pending_lock);
fence->inval_time = ktime_get();
list_add_tail(&fence->link, &tlb_inval->pending_fences);
if (list_is_singular(&tlb_inval->pending_fences))
queue_delayed_work(system_wq, &tlb_inval->fence_tdr,
tlb_inval->ops->timeout_delay(tlb_inval));
spin_unlock_irq(&tlb_inval->pending_lock);
tlb_inval->seqno = (tlb_inval->seqno + 1) %
TLB_INVALIDATION_SEQNO_MAX;
if (!tlb_inval->seqno)
tlb_inval->seqno = 1;
}
#define xe_tlb_inval_issue(__tlb_inval, __fence, op, args...) \
({ \
int __ret; \
\
xe_assert((__tlb_inval)->xe, (__tlb_inval)->ops); \
xe_assert((__tlb_inval)->xe, (__fence)); \
\
mutex_lock(&(__tlb_inval)->seqno_lock); \
xe_tlb_inval_fence_prep((__fence)); \
__ret = op((__tlb_inval), (__fence)->seqno, ##args); \
if (__ret < 0) \
xe_tlb_inval_fence_signal_unlocked((__fence)); \
mutex_unlock(&(__tlb_inval)->seqno_lock); \
\
__ret == -ECANCELED ? 0 : __ret; \
})
/**
* xe_tlb_inval_all() - Issue a TLB invalidation for all TLBs
* @tlb_inval: TLB invalidation client
* @fence: invalidation fence which will be signal on TLB invalidation
* completion
*
* Issue a TLB invalidation for all TLBs. Completion of TLB is asynchronous and
* caller can use the invalidation fence to wait for completion.
*
* Return: 0 on success, negative error code on error
*/
int xe_tlb_inval_all(struct xe_tlb_inval *tlb_inval,
struct xe_tlb_inval_fence *fence)
{
return xe_tlb_inval_issue(tlb_inval, fence, tlb_inval->ops->all);
}
/**
* xe_tlb_inval_ggtt() - Issue a TLB invalidation for the GGTT
* @tlb_inval: TLB invalidation client
*
* Issue a TLB invalidation for the GGTT. Completion of TLB is asynchronous and
* caller can use the invalidation fence to wait for completion.
*
* Return: 0 on success, negative error code on error
*/
int xe_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval)
{
struct xe_tlb_inval_fence fence, *fence_ptr = &fence;
int ret;
xe_tlb_inval_fence_init(tlb_inval, fence_ptr, true);
ret = xe_tlb_inval_issue(tlb_inval, fence_ptr, tlb_inval->ops->ggtt);
xe_tlb_inval_fence_wait(fence_ptr);
return ret;
}
/**
* xe_tlb_inval_range() - Issue a TLB invalidation for an address range
* @tlb_inval: TLB invalidation client
* @fence: invalidation fence which will be signal on TLB invalidation
* completion
* @start: start address
* @end: end address
* @asid: address space id
*
* Issue a range based TLB invalidation if supported, if not fallback to a full
* TLB invalidation. Completion of TLB is asynchronous and caller can use
* the invalidation fence to wait for completion.
*
* Return: Negative error code on error, 0 on success
*/
int xe_tlb_inval_range(struct xe_tlb_inval *tlb_inval,
struct xe_tlb_inval_fence *fence, u64 start, u64 end,
u32 asid)
{
return xe_tlb_inval_issue(tlb_inval, fence, tlb_inval->ops->ppgtt,
start, end, asid);
}
/**
* xe_tlb_inval_vm() - Issue a TLB invalidation for a VM
* @tlb_inval: TLB invalidation client
* @vm: VM to invalidate
*
* Invalidate entire VM's address space
*/
void xe_tlb_inval_vm(struct xe_tlb_inval *tlb_inval, struct xe_vm *vm)
{
struct xe_tlb_inval_fence fence;
u64 range = 1ull << vm->xe->info.va_bits;
xe_tlb_inval_fence_init(tlb_inval, &fence, true);
xe_tlb_inval_range(tlb_inval, &fence, 0, range, vm->usm.asid);
xe_tlb_inval_fence_wait(&fence);
}
/**
* xe_tlb_inval_done_handler() - TLB invalidation done handler
* @tlb_inval: TLB invalidation client
* @seqno: seqno of invalidation that is done
*
* Update recv seqno, signal any TLB invalidation fences, and restart TDR
*/
void xe_tlb_inval_done_handler(struct xe_tlb_inval *tlb_inval, int seqno)
{
struct xe_device *xe = tlb_inval->xe;
struct xe_tlb_inval_fence *fence, *next;
unsigned long flags;
/*
* This can also be run both directly from the IRQ handler and also in
* process_g2h_msg(). Only one may process any individual CT message,
* however the order they are processed here could result in skipping a
* seqno. To handle that we just process all the seqnos from the last
* seqno_recv up to and including the one in msg[0]. The delta should be
* very small so there shouldn't be much of pending_fences we actually
* need to iterate over here.
*
* From GuC POV we expect the seqnos to always appear in-order, so if we
* see something later in the timeline we can be sure that anything
* appearing earlier has already signalled, just that we have yet to
* officially process the CT message like if racing against
* process_g2h_msg().
*/
spin_lock_irqsave(&tlb_inval->pending_lock, flags);
if (xe_tlb_inval_seqno_past(tlb_inval, seqno)) {
spin_unlock_irqrestore(&tlb_inval->pending_lock, flags);
return;
}
WRITE_ONCE(tlb_inval->seqno_recv, seqno);
list_for_each_entry_safe(fence, next,
&tlb_inval->pending_fences, link) {
trace_xe_tlb_inval_fence_recv(xe, fence);
if (!xe_tlb_inval_seqno_past(tlb_inval, fence->seqno))
break;
xe_tlb_inval_fence_signal(fence);
}
if (!list_empty(&tlb_inval->pending_fences))
mod_delayed_work(system_wq,
&tlb_inval->fence_tdr,
tlb_inval->ops->timeout_delay(tlb_inval));
else
cancel_delayed_work(&tlb_inval->fence_tdr);
spin_unlock_irqrestore(&tlb_inval->pending_lock, flags);
}
static const char *
xe_inval_fence_get_driver_name(struct dma_fence *dma_fence)
{
return "xe";
}
static const char *
xe_inval_fence_get_timeline_name(struct dma_fence *dma_fence)
{
return "tlb_inval_fence";
}
static const struct dma_fence_ops inval_fence_ops = {
.get_driver_name = xe_inval_fence_get_driver_name,
.get_timeline_name = xe_inval_fence_get_timeline_name,
};
/**
* xe_tlb_inval_fence_init() - Initialize TLB invalidation fence
* @tlb_inval: TLB invalidation client
* @fence: TLB invalidation fence to initialize
* @stack: fence is stack variable
*
* Initialize TLB invalidation fence for use. xe_tlb_inval_fence_fini
* will be automatically called when fence is signalled (all fences must signal),
* even on error.
*/
void xe_tlb_inval_fence_init(struct xe_tlb_inval *tlb_inval,
struct xe_tlb_inval_fence *fence,
bool stack)
{
xe_pm_runtime_get_noresume(tlb_inval->xe);
spin_lock_irq(&tlb_inval->lock);
dma_fence_init(&fence->base, &inval_fence_ops, &tlb_inval->lock,
dma_fence_context_alloc(1), 1);
spin_unlock_irq(&tlb_inval->lock);
INIT_LIST_HEAD(&fence->link);
if (stack)
set_bit(FENCE_STACK_BIT, &fence->base.flags);
else
dma_fence_get(&fence->base);
fence->tlb_inval = tlb_inval;
}