Skip to content

Commit 720e734

Browse files
Muralidhara M KAvadhut Naik
authored andcommitted
rasdaemon: Handle reassigned bit definitions for UMC bank
On some AMD systems some of the existing bit definitions in the CTL register of SMCA bank type are reassigned without defining new HWID and McaType. Consequently, the errors whose bit definitions have been reassigned in the CTL register are being erroneously decoded. Add new error description structure to compensate for the reassigned bit definitions, by new software defined SMCA bank type by utilizing the hardware-reserved values for HWID. The new SMCA bank type will only be employed for UMC error decoding on affected models and the existing error description structure for UMC bank type is still valid. Signed-off-by: Muralidhara M K <[email protected]>
1 parent c285153 commit 720e734

File tree

1 file changed

+28
-2
lines changed

1 file changed

+28
-2
lines changed

mce-amd-smca.c

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ enum smca_bank_types {
6060
SMCA_CS_V2_QUIRK,
6161
SMCA_PIE, /* Power, Interrupts, etc. */
6262
SMCA_UMC, /* Unified Memory Controller */
63+
SMCA_UMC_QUIRK,
6364
SMCA_UMC_V2,
6465
SMCA_MA_LLC, /* Memory Attached Last Level Cache */
6566
SMCA_PB, /* Parameter Block */
@@ -313,6 +314,25 @@ static const char * const smca_umc_mce_desc[] = {
313314
"Read CRC Error",
314315
};
315316

317+
static const char * const smca_umc_quirk_mce_desc[] = {
318+
"DRAM On Die ECC error",
319+
"Data poison error",
320+
"SDP parity error",
321+
"Reserved",
322+
"Address/Command parity error",
323+
"HBM Write data parity error",
324+
"Consolidated SRAM ECC error",
325+
"Reserved",
326+
"Reserved",
327+
"Rdb SRAM ECC error",
328+
"Thermal throttling",
329+
"HBM Read Data Parity error",
330+
"Reserved",
331+
"UMC FW Error",
332+
"SRAM Parity Error",
333+
"HBM CRC Error",
334+
};
335+
316336
static const char * const smca_umc2_mce_desc[] = {
317337
"DRAM ECC error",
318338
"Data poison error",
@@ -642,6 +662,7 @@ static struct smca_mce_desc smca_mce_descs[] = {
642662
[SMCA_CS_V2_QUIRK] = { smca_cs2_quirk_mce_desc, ARRAY_SIZE(smca_cs2_quirk_mce_desc)},
643663
[SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) },
644664
[SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) },
665+
[SMCA_UMC_QUIRK] = { smca_umc_quirk_mce_desc, ARRAY_SIZE(smca_umc_quirk_mce_desc) },
645666
[SMCA_UMC_V2] = { smca_umc2_mce_desc, ARRAY_SIZE(smca_umc2_mce_desc) },
646667
[SMCA_MA_LLC] = { smca_mall_mce_desc, ARRAY_SIZE(smca_mall_mce_desc) },
647668
[SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) },
@@ -696,6 +717,7 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
696717

697718
/* Unified Memory Controller MCA type */
698719
{ SMCA_UMC, 0x00000096 },
720+
{ SMCA_UMC_QUIRK, 0x00020000 },
699721
/* Heterogeneous systems may have both UMC and UMC_v2 types on the same node. */
700722
{ SMCA_UMC_V2, 0x00010096 },
701723
/* Memory Attached Last Level Cache */
@@ -764,7 +786,7 @@ static struct smca_bank_name smca_names[] = {
764786
[SMCA_L3_CACHE] = { "L3 Cache" },
765787
[SMCA_CS ... SMCA_CS_V2_QUIRK] = { "Coherent Slave" },
766788
[SMCA_PIE] = { "Power, Interrupts, etc." },
767-
[SMCA_UMC] = { "Unified Memory Controller" },
789+
[SMCA_UMC ... SMCA_UMC_QUIRK] = { "Unified Memory Controller" },
768790
[SMCA_UMC_V2] = { "Unified Memory Controller V2" },
769791
[SMCA_MA_LLC] = { "Memory Attached Last Level Cache" },
770792
[SMCA_PB] = { "Parameter Block" },
@@ -843,6 +865,10 @@ static inline void fixup_hwid(struct mce_priv* m, uint32_t *hwid_mcatype)
843865
if (*hwid_mcatype == 0x0002002E)
844866
*hwid_mcatype = 0x00010000;
845867
break;
868+
case 0x90 ... 0x9F:
869+
if ((*hwid_mcatype & 0xFF) == 0x00000096)
870+
*hwid_mcatype = 0x00020000;
871+
break;
846872
default:
847873
break;
848874
}
@@ -908,7 +934,7 @@ void decode_smca_error(struct mce_event *e, struct mce_priv *m)
908934
smca_mce_descs[bank_type].descs[xec],
909935
xec);
910936

911-
if (bank_type == SMCA_UMC && xec == 0) {
937+
if ((bank_type == SMCA_UMC || bank_type == SMCA_UMC_QUIRK) && xec == 0) {
912938
channel = find_umc_channel(e);
913939
csrow = e->synd & 0x7; /* Bit 0, 1 ,2 */
914940
mce_snprintf(e->mc_location, "memory_channel=%d,csrow=%d",

0 commit comments

Comments
 (0)