diff mbox series

[6/6] decode-dimms: Decode DDR5 error log

Message ID 20241114-decode-ddr5-v1-6-0ed2db8ef30f@outlook.com.au
State New
Headers show
Series decode-dimms: Implement DDR5 decoding | expand

Commit Message

Stephen Horvath via B4 Relay Nov. 14, 2024, 6:37 a.m. UTC
From: Stephen Horvath <s.horvath@outlook.com.au>

JESD400 specifies that an error log can be written to anywhere in the end
user programmable eeprom section, following a specific format. This adds
some code to find and read this error log. This is also completely
untested on actual hardware implementations, only tested by reading some
manually constructed files.

Signed-off-by: Stephen Horvath <s.horvath@outlook.com.au>
---
 eeprom/decode-dimms | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 105 insertions(+)
diff mbox series

Patch

diff --git a/eeprom/decode-dimms b/eeprom/decode-dimms
index a3b613bc869bbd1d8183958c42d05c3b3e3653ca..a6a16696b126b16b7a8e62b15120f99789d9b762 100755
--- a/eeprom/decode-dimms
+++ b/eeprom/decode-dimms
@@ -2656,6 +2656,108 @@  sub decode_ddr5_mfg_data($)
 	}
 }
 
+# Parameter: EEPROM bytes 0-1023 (using 640-1023)
+sub decode_ddr5_error_data($)
+{
+	my $bytes = shift;
+
+	# Zero or more error logs may appear anywhere in any End User Programmable blocks of the SPD,
+	# including over SPD Block boundaries. They may be found by searching for a four byte anchor string.
+
+	my $errors = [];
+
+	my $size = scalar @{$bytes} < 1023 ? scalar @{$bytes} : 1023;
+
+	for (my $ii = 0; $ii < $size - 640 - 23; $ii++) {
+		if (join('', @{$bytes}[640 + $ii .. 640 + $ii + 3]) eq "95707695") {
+			push @{$errors}, [@{$bytes}[640 + $ii .. 640 + $ii + 23]];
+			$ii += 23;
+		}
+	}
+
+	if (@{$errors} == 0) {
+		# No error logs found
+		return;
+	}
+
+	prints("Error Log");
+
+	printl("Error Log Count", scalar @{$errors});
+
+	for (my $ii = 0; $ii < scalar @{$errors}; $ii++) {
+		my $error = @{$errors}[$ii];
+	
+		# error location
+		printl_cond($error->[4] & (1 << 0), "Error $ii Type", "DRAM Uncorrectable Error");
+		printl_cond($error->[4] & (1 << 1), "Error $ii Type", "DRAM Correctable Error");
+		printl_cond($error->[4] & (1 << 2), "Error $ii Type", "DRAM ECS Error");
+		printl_cond($error->[4] & (1 << 3), "Error $ii Type", "hPPR Was Required");
+		printl_cond($error->[4] & (1 << 4), "Error $ii Type", "hPPR Resource Error");
+
+		printl("Error $ii Location CPU", ($error->[5] >> 3) & 0x07);
+		printl("Error $ii Location CPUMC", (($error->[5] & 3) << 2) | ($error->[6] >> 6));
+		printl("Error $ii Location DIMM", ($error->[6] >> 4) & 0x01);
+
+		# these are active low
+		printl_cond(~$error->[6] & (1 << 3), "Error $ii Location Rank", "0 (sub-channel A)");
+		printl_cond(~$error->[6] & (1 << 2), "Error $ii Location Rank", "1 (sub-channel A)");
+		printl_cond(~$error->[6] & (1 << 1), "Error $ii Location Rank", "0 (sub-channel B)");
+		printl_cond(~$error->[6] & (1 << 0), "Error $ii Location Rank", "1 (sub-channel B)");
+
+		printl("Error $ii Location Parity", ($error->[7] >> 6) & 0x01);
+		if (($error->[7] >> 5) & 1) {
+			# chip identifier?
+			printl("Error $ii Location Chip", ($error->[7] >> 2) & 0x07);
+		} else {
+			# row address?
+			printl("Error $ii Location Bank Group", (($error->[7] & 0x03) << 1) | (($error->[8] & 0x80) >> 7));
+			printl("Error $ii Location Bank Address", ($error->[8] >> 5) & 0x03);
+			printl("Error $ii Location Row Address", (($error->[8] & 0x1f) << 12) | ($error->[9] << 4) | ($error->[10] >> 4));
+			printl("Error $ii Location Column Address", (($error->[10] & 0x0f) << 7) | (($error->[11] & 0xf0) >> 1));
+		}
+
+		# also active low
+		printl_cond(~$error->[11] & (1 << 0), "Error $ii Location Device", "DQS6A");
+		printl_cond(~$error->[11] & (1 << 1), "Error $ii Location Device", "DQS7A");
+		printl_cond(~$error->[11] & (1 << 2), "Error $ii Location Device", "DQS8A");
+		printl_cond(~$error->[11] & (1 << 3), "Error $ii Location Device", "DQS9A");
+
+		printl_cond(~$error->[12] & (1 << 0), "Error $ii Location Device", "DQS8B");
+		printl_cond(~$error->[12] & (1 << 1), "Error $ii Location Device", "DQS9B");
+		printl_cond(~$error->[12] & (1 << 2), "Error $ii Location Device", "DQS0A");
+		printl_cond(~$error->[12] & (1 << 3), "Error $ii Location Device", "DQS1A");
+		printl_cond(~$error->[12] & (1 << 4), "Error $ii Location Device", "DQS2A");
+		printl_cond(~$error->[12] & (1 << 5), "Error $ii Location Device", "DQS3A");
+		printl_cond(~$error->[12] & (1 << 6), "Error $ii Location Device", "DQS4A");
+		printl_cond(~$error->[12] & (1 << 7), "Error $ii Location Device", "DQS5A");
+
+		printl_cond(~$error->[12] & (1 << 0), "Error $ii Location Device", "DQS0B");
+		printl_cond(~$error->[12] & (1 << 1), "Error $ii Location Device", "DQS1B");
+		printl_cond(~$error->[12] & (1 << 2), "Error $ii Location Device", "DQS2B");
+		printl_cond(~$error->[12] & (1 << 3), "Error $ii Location Device", "DQS3B");
+		printl_cond(~$error->[12] & (1 << 4), "Error $ii Location Device", "DQS4B");
+		printl_cond(~$error->[12] & (1 << 5), "Error $ii Location Device", "DQS5B");
+		printl_cond(~$error->[12] & (1 << 6), "Error $ii Location Device", "DQS6B");
+		printl_cond(~$error->[12] & (1 << 7), "Error $ii Location Device", "DQS7B");
+
+		# timestamp
+		my $year = ($error->[14] >> 2) + 2020;
+		my $month = (($error->[14] & 0x03) << 2) | ($error->[15] >> 6);
+		my $day = ($error->[15] & 0x3e) >> 1;
+		my $hour = (($error->[15] & 0x01) << 4) | ($error->[16] >> 4);
+		my $minute = (($error->[16] & 0x0f) << 2) | ($error->[17] >> 6);
+		my $second = $error->[17] & 0x3f;
+		printl("Error $ii Timestamp", sprintf("%04d-%02d-%02d %02d:%02d:%02d",
+						      $year, $month, $day, $hour, $minute, $second));
+
+		# DRAM refresh settings
+		# TODO
+
+		# measured temperature
+		# TODO
+	}
+}
+
 # Parameter: EEPROM bytes 0-127 (using 64-98)
 sub decode_manufacturing_information($)
 {
@@ -3215,6 +3317,9 @@  for $current (0 .. $#dimm) {
 			# Decode DDR5-specific manufacturing data in bytes
 			# 512-639
 			decode_ddr5_mfg_data(\@bytes);
+			# Decode DDR5-specific error log
+			# 640-1023 (max)
+			decode_ddr5_error_data(\@bytes);
 		}
 	} else {
 		# Decode next 35 bytes (64-98, common to most