Skip to content

Commit

Permalink
rasdaemon: Add support for vendor-specific machine check error inform…
Browse files Browse the repository at this point in the history
…ation

Some CPU vendors may provide additional vendor-specific machine check
error information. AMD, for example, provides FRU Text through SYND 1/2
registers if BIT 9 of SMCA_CONFIG register is set.

Add support to display the additional vendor-specific error information,
if any.

Signed-off-by: Avadhut Naik <[email protected]>
  • Loading branch information
AvaNaik authored and Avadhut Naik committed Nov 21, 2023
1 parent cfabd93 commit 926c2b3
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 0 deletions.
12 changes: 12 additions & 0 deletions mce-amd-smca.c
Original file line number Diff line number Diff line change
Expand Up @@ -965,6 +965,18 @@ void decode_smca_error(struct mce_event *e, struct mce_priv *m)
channel, csrow);
}


if (e->vdata_len) {
uint64_t smca_config = e->vdata[2];

/*
* BIT 9 of the CONFIG register of a few SMCA Bank types indicates
* presence of FRU Text in SYND 1 / 2 registers
*/
if (smca_config & BIT(9))
memcpy(e->frutext, e->vdata, 16);
}

}

int parse_amd_smca_event(struct ras_events *ras, struct mce_event *e)
Expand Down
21 changes: 21 additions & 0 deletions ras-mce-handler.c
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,24 @@ static void report_mce_event(struct ras_events *ras,

trace_seq_printf(s, ", apicid= %x", e->apicid);

if (!e->vdata_len)
return;

if (strlen(e->frutext)) {
trace_seq_printf(s, ", FRU Text= %s", e->frutext);
trace_seq_printf(s, ", Vendor Data= ");
for (int i = 2; i < e->vdata_len/8; i++) {
trace_seq_printf(s, "0x%lx", e->vdata[i]);
trace_seq_printf(s, " ");
}
} else {
trace_seq_printf(s, ", Vendor Data= ");
for (int i = 0; i < e->vdata_len/8; i ++) {
trace_seq_printf(s, "0x%lx", e->vdata[i]);
trace_seq_printf(s, " ");
}
}

/*
* FIXME: The original mcelog userspace tool uses DMI to map from
* address to DIMM. From the comments there, the code there doesn't
Expand Down Expand Up @@ -548,6 +566,9 @@ int ras_mce_event_handler(struct trace_seq *s,
return -1;
e.ipid = val;

/* Get Vendor-specfic Data, if any */
e.vdata = tep_get_field_raw(s, event, "v_data", record, &e.vdata_len, 1);

switch (mce->cputype) {
case CPU_GENERIC:
break;
Expand Down
3 changes: 3 additions & 0 deletions ras-mce-handler.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,11 @@ struct mce_event {
uint8_t cpuvendor;
uint64_t synd; /* MCA_SYND MSR: only valid on SMCA systems */
uint64_t ipid; /* MCA_IPID MSR: only valid on SMCA systems */
int32_t vdata_len;
const uint64_t *vdata;

/* Parsed data */
char frutext[17];
char timestamp[64];
char bank_name[64];
char error_msg[4096];
Expand Down

0 comments on commit 926c2b3

Please sign in to comment.