drm/amdgpu: Skip poison aca bank from UE channel

[ Upstream commit 8e8e08c831 ]

Avoid GFX poison consumption errors logged when fatal error occurs.

Signed-off-by: Xiang Liu <xiang.liu@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
Xiang Liu
2025-07-30 11:07:43 +08:00
committed by Greg Kroah-Hartman
parent eb553214dc
commit 51ece8d2a9

View File

@@ -132,6 +132,27 @@ static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, st
idx + 1, total, aca_regs[i].name, bank->regs[aca_regs[i].reg_idx]);
}
static bool aca_bank_hwip_is_matched(struct aca_bank *bank, enum aca_hwip_type type)
{
struct aca_hwip *hwip;
int hwid, mcatype;
u64 ipid;
if (!bank || type == ACA_HWIP_TYPE_UNKNOW)
return false;
hwip = &aca_hwid_mcatypes[type];
if (!hwip->hwid)
return false;
ipid = bank->regs[ACA_REG_IDX_IPID];
hwid = ACA_REG__IPID__HARDWAREID(ipid);
mcatype = ACA_REG__IPID__MCATYPE(ipid);
return hwip->hwid == hwid && hwip->mcatype == mcatype;
}
static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_type type,
int start, int count,
struct aca_banks *banks, struct ras_query_context *qctx)
@@ -170,6 +191,15 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_
bank.type = type;
/*
* Poison being consumed when injecting a UE while running background workloads,
* which are unexpected.
*/
if (type == ACA_SMU_TYPE_UE &&
ACA_REG__STATUS__POISON(bank.regs[ACA_REG_IDX_STATUS]) &&
!aca_bank_hwip_is_matched(&bank, ACA_HWIP_TYPE_UMC))
continue;
aca_smu_bank_dump(adev, i, count, &bank, qctx);
ret = aca_banks_add_bank(banks, &bank);
@@ -180,27 +210,6 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_
return 0;
}
static bool aca_bank_hwip_is_matched(struct aca_bank *bank, enum aca_hwip_type type)
{
struct aca_hwip *hwip;
int hwid, mcatype;
u64 ipid;
if (!bank || type == ACA_HWIP_TYPE_UNKNOW)
return false;
hwip = &aca_hwid_mcatypes[type];
if (!hwip->hwid)
return false;
ipid = bank->regs[ACA_REG_IDX_IPID];
hwid = ACA_REG__IPID__HARDWAREID(ipid);
mcatype = ACA_REG__IPID__MCATYPE(ipid);
return hwip->hwid == hwid && hwip->mcatype == mcatype;
}
static bool aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type)
{
const struct aca_bank_ops *bank_ops = handle->bank_ops;