mirror of
https://github.com/raspberrypi/linux.git
synced 2025-12-06 10:00:17 +00:00
drm/amdkfd: Handle errors from svm validate and map
If new range is splited to multiple pranges with max_svm_range_pages
alignment and added to update_list, svm validate and map should keep
going after error to make sure prange->mapped_to_gpu flag is up to date
for the whole range.
svm validate and map update set prange->mapped_to_gpu after mapping to
GPUs successfully, otherwise clear prange->mapped_to_gpu flag (for
update mapping case) instead of setting error flag, we can remove
the redundant error flag to simpliy code.
Refactor to remove goto and update prange->mapped_to_gpu flag inside
svm_range_lock, to guarant we always evict queues or unmap from GPUs if
there are invalid ranges.
After svm validate and map return error -EAGIN, the caller retry will
update the mapping for the whole range again.
Fixes: c22b044070 ("drm/amdkfd: flag added to handle errors from svm validate and map")
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Tested-by: James Zhu <james.zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
committed by
Alex Deucher
parent
21e43386ae
commit
eb3c357bcb
@@ -829,7 +829,7 @@ svm_range_is_same_attrs(struct kfd_process *p, struct svm_range *prange,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return !prange->is_error_flag;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -1679,71 +1679,66 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
|
|||||||
|
|
||||||
start = prange->start << PAGE_SHIFT;
|
start = prange->start << PAGE_SHIFT;
|
||||||
end = (prange->last + 1) << PAGE_SHIFT;
|
end = (prange->last + 1) << PAGE_SHIFT;
|
||||||
for (addr = start; addr < end && !r; ) {
|
for (addr = start; !r && addr < end; ) {
|
||||||
struct hmm_range *hmm_range;
|
struct hmm_range *hmm_range;
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
unsigned long next;
|
unsigned long next = 0;
|
||||||
unsigned long offset;
|
unsigned long offset;
|
||||||
unsigned long npages;
|
unsigned long npages;
|
||||||
bool readonly;
|
bool readonly;
|
||||||
|
|
||||||
vma = vma_lookup(mm, addr);
|
vma = vma_lookup(mm, addr);
|
||||||
if (!vma) {
|
if (vma) {
|
||||||
|
readonly = !(vma->vm_flags & VM_WRITE);
|
||||||
|
|
||||||
|
next = min(vma->vm_end, end);
|
||||||
|
npages = (next - addr) >> PAGE_SHIFT;
|
||||||
|
WRITE_ONCE(p->svms.faulting_task, current);
|
||||||
|
r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, npages,
|
||||||
|
readonly, owner, NULL,
|
||||||
|
&hmm_range);
|
||||||
|
WRITE_ONCE(p->svms.faulting_task, NULL);
|
||||||
|
if (r) {
|
||||||
|
pr_debug("failed %d to get svm range pages\n", r);
|
||||||
|
if (r == -EBUSY)
|
||||||
|
r = -EAGAIN;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
r = -EFAULT;
|
r = -EFAULT;
|
||||||
goto unreserve_out;
|
|
||||||
}
|
|
||||||
readonly = !(vma->vm_flags & VM_WRITE);
|
|
||||||
|
|
||||||
next = min(vma->vm_end, end);
|
|
||||||
npages = (next - addr) >> PAGE_SHIFT;
|
|
||||||
WRITE_ONCE(p->svms.faulting_task, current);
|
|
||||||
r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, npages,
|
|
||||||
readonly, owner, NULL,
|
|
||||||
&hmm_range);
|
|
||||||
WRITE_ONCE(p->svms.faulting_task, NULL);
|
|
||||||
if (r) {
|
|
||||||
pr_debug("failed %d to get svm range pages\n", r);
|
|
||||||
if (r == -EBUSY)
|
|
||||||
r = -EAGAIN;
|
|
||||||
goto unreserve_out;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
offset = (addr - start) >> PAGE_SHIFT;
|
if (!r) {
|
||||||
r = svm_range_dma_map(prange, ctx->bitmap, offset, npages,
|
offset = (addr - start) >> PAGE_SHIFT;
|
||||||
hmm_range->hmm_pfns);
|
r = svm_range_dma_map(prange, ctx->bitmap, offset, npages,
|
||||||
if (r) {
|
hmm_range->hmm_pfns);
|
||||||
pr_debug("failed %d to dma map range\n", r);
|
if (r)
|
||||||
goto unreserve_out;
|
pr_debug("failed %d to dma map range\n", r);
|
||||||
}
|
}
|
||||||
|
|
||||||
svm_range_lock(prange);
|
svm_range_lock(prange);
|
||||||
if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
|
if (!r && amdgpu_hmm_range_get_pages_done(hmm_range)) {
|
||||||
pr_debug("hmm update the range, need validate again\n");
|
pr_debug("hmm update the range, need validate again\n");
|
||||||
r = -EAGAIN;
|
r = -EAGAIN;
|
||||||
goto unlock_out;
|
|
||||||
}
|
}
|
||||||
if (!list_empty(&prange->child_list)) {
|
|
||||||
|
if (!r && !list_empty(&prange->child_list)) {
|
||||||
pr_debug("range split by unmap in parallel, validate again\n");
|
pr_debug("range split by unmap in parallel, validate again\n");
|
||||||
r = -EAGAIN;
|
r = -EAGAIN;
|
||||||
goto unlock_out;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
r = svm_range_map_to_gpus(prange, offset, npages, readonly,
|
if (!r)
|
||||||
ctx->bitmap, wait, flush_tlb);
|
r = svm_range_map_to_gpus(prange, offset, npages, readonly,
|
||||||
|
ctx->bitmap, wait, flush_tlb);
|
||||||
|
|
||||||
|
if (!r && next == end)
|
||||||
|
prange->mapped_to_gpu = true;
|
||||||
|
|
||||||
unlock_out:
|
|
||||||
svm_range_unlock(prange);
|
svm_range_unlock(prange);
|
||||||
|
|
||||||
addr = next;
|
addr = next;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (addr == end)
|
|
||||||
prange->mapped_to_gpu = true;
|
|
||||||
|
|
||||||
unreserve_out:
|
|
||||||
svm_range_unreserve_bos(ctx);
|
svm_range_unreserve_bos(ctx);
|
||||||
|
|
||||||
prange->is_error_flag = !!r;
|
|
||||||
if (!r)
|
if (!r)
|
||||||
prange->validate_timestamp = ktime_get_boottime();
|
prange->validate_timestamp = ktime_get_boottime();
|
||||||
|
|
||||||
@@ -2112,7 +2107,8 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
|
|||||||
next = interval_tree_iter_next(node, start, last);
|
next = interval_tree_iter_next(node, start, last);
|
||||||
next_start = min(node->last, last) + 1;
|
next_start = min(node->last, last) + 1;
|
||||||
|
|
||||||
if (svm_range_is_same_attrs(p, prange, nattr, attrs)) {
|
if (svm_range_is_same_attrs(p, prange, nattr, attrs) &&
|
||||||
|
prange->mapped_to_gpu) {
|
||||||
/* nothing to do */
|
/* nothing to do */
|
||||||
} else if (node->start < start || node->last > last) {
|
} else if (node->start < start || node->last > last) {
|
||||||
/* node intersects the update range and its attributes
|
/* node intersects the update range and its attributes
|
||||||
@@ -3525,7 +3521,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
|
|||||||
struct svm_range *next;
|
struct svm_range *next;
|
||||||
bool update_mapping = false;
|
bool update_mapping = false;
|
||||||
bool flush_tlb;
|
bool flush_tlb;
|
||||||
int r = 0;
|
int r, ret = 0;
|
||||||
|
|
||||||
pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n",
|
pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n",
|
||||||
p->pasid, &p->svms, start, start + size - 1, size);
|
p->pasid, &p->svms, start, start + size - 1, size);
|
||||||
@@ -3613,7 +3609,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
|
|||||||
out_unlock_range:
|
out_unlock_range:
|
||||||
mutex_unlock(&prange->migrate_mutex);
|
mutex_unlock(&prange->migrate_mutex);
|
||||||
if (r)
|
if (r)
|
||||||
break;
|
ret = r;
|
||||||
}
|
}
|
||||||
|
|
||||||
dynamic_svm_range_dump(svms);
|
dynamic_svm_range_dump(svms);
|
||||||
@@ -3626,7 +3622,7 @@ out:
|
|||||||
pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", p->pasid,
|
pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", p->pasid,
|
||||||
&p->svms, start, start + size - 1, r);
|
&p->svms, start, start + size - 1, r);
|
||||||
|
|
||||||
return r;
|
return ret ? ret : r;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
|||||||
@@ -133,7 +133,6 @@ struct svm_range {
|
|||||||
DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE);
|
DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE);
|
||||||
DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
|
DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
|
||||||
bool mapped_to_gpu;
|
bool mapped_to_gpu;
|
||||||
bool is_error_flag;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline void svm_range_lock(struct svm_range *prange)
|
static inline void svm_range_lock(struct svm_range *prange)
|
||||||
|
|||||||
Reference in New Issue
Block a user