thesofproject · jsarha · Feb 2, 2026 · Feb 2, 2026 · Feb 10, 2026 · Mar 3, 2026
@@ -6,6 +6,8 @@ CONFIG_SOF_DEBUG_STREAM_TEXT_MSG=y
 CONFIG_SOF_DEBUG_STREAM_THREAD_INFO=y
 # Zephyr option for storing human readable thread names
 CONFIG_THREAD_NAME=y
+# For Zephyr to compile with thread names on PTL we need to increase THREAD_BYTES
+CONFIG_MAX_THREAD_BYTES=4
 
 # Debug window slot configuration 1
 # The CONFIG_SOF_TELEMETRY uses slot 2, but with performance and IO-performance
@@ -14,9 +16,15 @@ CONFIG_MEMORY_WIN_2_SIZE=16384
 CONFIG_SOF_TELEMETRY_PERFORMANCE_MEASUREMENTS=n
 CONFIG_SOF_TELEMETRY_IO_PERFORMANCE_MEASUREMENTS=n
 
-# If we turn telemetry off all together, we can use slot 2. Slot 1 is used by mtrace
+# If we turn telemetry off altogether, we can use slot 2. Slot 1 is used by mtrace
 #CONFIG_SOF_DEBUG_STREAM_SLOT_NUMBER=2
 #CONFIG_SOF_TELEMETRY=n
 #CONFIG_SOF_TELEMETRY_PERFORMANCE_MEASUREMENTS=n
 #CONFIG_SOF_TELEMETRY_IO_PERFORMANCE_MEASUREMENTS=n
 
+# Enable Zephyr exception printing hook; debug stream is sensitive to this option too
+CONFIG_EXCEPTION_DUMP_HOOK=y
+# Do not try to send exception prints through logs; this causes problems on PTL with mtrace
+CONFIG_EXCEPTION_DUMP_HOOK_ONLY=y
+# Print also backtrace through the exception hook
+CONFIG_XTENSA_BACKTRACE_EXCEPTION_DUMP_HOOK=y
@@ -10,11 +10,12 @@
 #include <rtos/string.h>
 #include <user/debug_stream.h>
 #include <user/debug_stream_slot.h>
+#include <zephyr/kernel.h>
 
 LOG_MODULE_REGISTER(debug_stream_slot);
 
 struct cpu_mutex {
-	struct k_mutex m;
+	struct k_spinlock l;
 } __aligned(CONFIG_DCACHE_LINE_SIZE);
 
 /* CPU specific mutexes for each circular buffer */
@@ -66,6 +67,7 @@ int debug_stream_slot_send_record(struct debug_stream_record *rec)
 		debug_stream_get_circular_buffer(&desc, arch_proc_id());
 	uint32_t record_size = rec->size_words;
 	uint32_t record_start, buf_remain;
+	k_spinlock_key_t key;
 
 	LOG_DBG("Sending record %u id %u len %u", rec->seqno, rec->id, rec->size_words);
 
@@ -77,7 +79,7 @@ int debug_stream_slot_send_record(struct debug_stream_record *rec)
 			desc.buf_words, desc.core_id, desc.buf_words, desc.offset);
 		return -ENOMEM;
 	}
-	k_mutex_lock(&cpu_mutex[arch_proc_id()].m, K_FOREVER);
+	key = k_spin_lock(&cpu_mutex[arch_proc_id()].l);
 
 	rec->seqno = buf->next_seqno++;
 	rec->size_words = record_size + 1; /* +1 for size at the end of record */
@@ -105,7 +107,7 @@ int debug_stream_slot_send_record(struct debug_stream_record *rec)
 	buf->data[buf->w_ptr] = record_size + 1;
 	buf->w_ptr = (buf->w_ptr + 1) % desc.buf_words;
 
-	k_mutex_unlock(&cpu_mutex[arch_proc_id()].m);
+	k_spin_unlock(&cpu_mutex[arch_proc_id()].l, key);
 
 	LOG_DBG("Record %u id %u len %u sent", rec->seqno, rec->id, record_size);
 	return 0;
@@ -159,14 +161,6 @@ static int debug_stream_slot_init(void)
 
 		buf->next_seqno = 0;
 		buf->w_ptr = 0;
-		k_mutex_init(&cpu_mutex[i].m);
-		/* The core specific mutexes are now .bss which is uncached so the
-		 * following line is commented out. However, since the mutexes are
-		 * core specific there should be nothing preventing from having them
-		 * in cached memory.
-		 *
-		 * sys_cache_data_flush_range(&cpu_mutex[i], sizeof(cpu_mutex[i]));
-		 */
 static inline void k_spinlock_init(struct k_spinlock *lock) 
 static inline void k_spinlock_init(struct k_spinlock *lock) 
 	}
 	LOG_INF("Debug stream slot initialized");
 

@@ -8,9 +8,13 @@
 #include <soc.h>
 #include <adsp_debug_window.h>
 #include <sof/common.h>
+#include <zephyr/logging/log.h>
+#include <zephyr/arch/exception.h>
 
 #include <user/debug_stream_text_msg.h>
 
+LOG_MODULE_REGISTER(debug_stream_text_msg);
+
 void ds_msg(const char *format, ...)
 {
 	va_list args;
@@ -33,3 +37,109 @@ void ds_msg(const char *format, ...)
 						  sizeof(buf.msg.hdr.data[0]));
 	debug_stream_slot_send_record(&buf.msg.hdr);
 }
+
+#if defined(CONFIG_EXCEPTION_DUMP_HOOK)
+/* The debug stream debug window slot is 4k, and when it is split
+ * between the cores and the header/other overhead is removed, with 5
+ * cores the size is 768 bytes. The dump record must be smaller than
+ * that to get through to the host side.
+ *
+ * Also, because of the limited circular buffer size, we should only
+ * send one exception record. Exceptions often happen in bursts in a
+ * SOF system, and sending more than one record makes the host-side
+ * decoder lose track of things.
+ */
+static struct {
+	struct debug_stream_text_msg msg;
+	char text[640];
+} __packed ds_buf[CONFIG_MP_MAX_NUM_CPUS];
+static int reports_sent_cpu[CONFIG_MP_MAX_NUM_CPUS];
+static size_t ds_pos[CONFIG_MP_MAX_NUM_CPUS];
+
+static void ds_exception_drain(bool flush)
+{
+	unsigned int cpu = arch_proc_id();
+
+	if (cpu >= CONFIG_MP_MAX_NUM_CPUS)
+		return;
+
+	if (flush) {
+		ds_pos[cpu] = 0;
+		reports_sent_cpu[cpu] = 0;
+		return;
+	}
+
+	if (ds_pos[cpu] == 0)
+		return;
+
+	if (reports_sent_cpu[cpu] > 0)
+		return;
+
+	ds_buf[cpu].msg.hdr.id = DEBUG_STREAM_RECORD_ID_TEXT_MSG;
+	ds_buf[cpu].msg.hdr.size_words =
+		SOF_DIV_ROUND_UP(sizeof(ds_buf[cpu].msg) + ds_pos[cpu],
+				 sizeof(ds_buf[cpu].msg.hdr.data[0]));
+	/* Make sure the possible up to 3 extra bytes at end of msg are '\0' */
+	memset(ds_buf[cpu].text + ds_pos[cpu], 0,
+	       ds_buf[cpu].msg.hdr.size_words * sizeof(ds_buf[cpu].msg.hdr.data[0]) - ds_pos[cpu]);
+	debug_stream_slot_send_record(&ds_buf[cpu].msg.hdr);
+	reports_sent_cpu[cpu] = 1;
+	ds_pos[cpu] = 0;
+}
+
+static void ds_exception_dump(const char *format, va_list args)
+{
+	ssize_t len;
+	size_t avail;
+	size_t written;
+	unsigned int cpu = arch_proc_id();
+
+	if (cpu >= CONFIG_MP_MAX_NUM_CPUS)
+		return;
+
+	if (reports_sent_cpu[cpu] > 0)
+		return;
+
+	avail = sizeof(ds_buf[cpu].text) - ds_pos[cpu];
+	if (avail == 0) {
+		ds_exception_drain(false);
+		return;
+	}
+
+	if (format[0] == '\0')
+		return;
+
+	/* Skip useless " ** " prefix to save bytes */
+	if (strlen(format) >= 4 &&
+	    format[0] == ' ' && format[1] == '*' && format[2] == '*' && format[3] == ' ')
+		format += 4;
+
+	len = vsnprintf(ds_buf[cpu].text + ds_pos[cpu], avail, format, args);
+	if (len < 0) {
+		ds_pos[cpu] = 0;
+		return;
+	}
+
+	if (len == 0)
+		return;
+
+	if ((size_t)len >= avail)
+		written = avail - 1;
+	else
+		written = (size_t)len;
+
+	ds_pos[cpu] += written;
+
+	if (ds_pos[cpu] >= sizeof(ds_buf[cpu].text))
+		ds_exception_drain(false);
+}
+
+static int init_exception_dump_hook(void)
+{
+	set_exception_dump_hook(ds_exception_dump, ds_exception_drain);
+	LOG_INF("exception_dump_hook set");
+	return 0;
+}
+
+SYS_INIT(init_exception_dump_hook, APPLICATION, CONFIG_KERNEL_INIT_PRIORITY_DEFAULT);
+#endif
@@ -162,8 +162,9 @@ def print_text_msg(self, record, cpu):
         buffer = (
             ctypes.c_ubyte * (len(record) - ctypes.sizeof(TextMsg))
         ).from_address(ctypes.addressof(record) + ctypes.sizeof(TextMsg))
-        msg = bytearray(buffer).decode("utf-8")
-        print("CPU %u: %s" % (cpu, msg))
+        payload = bytes(buffer)
+        msg = payload.split(b"\0", 1)[0].decode("utf-8", errors="replace")
+        print("CPU %u:\n%s" % (cpu, msg))
         return True
 
 class DebugStreamSectionDescriptor(ctypes.Structure):