Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion app/debug_stream_overlay.conf
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ CONFIG_SOF_DEBUG_STREAM_TEXT_MSG=y
CONFIG_SOF_DEBUG_STREAM_THREAD_INFO=y
# Zephyr option for storing human readable thread names
CONFIG_THREAD_NAME=y
# For Zephyr to compile with thread names on PTL we need to increase THREAD_BYTES
CONFIG_MAX_THREAD_BYTES=4

# Debug window slot configuration 1
# The CONFIG_SOF_TELEMETRY uses slot 2, but with performance and IO-performance
Expand All @@ -14,9 +16,15 @@ CONFIG_MEMORY_WIN_2_SIZE=16384
CONFIG_SOF_TELEMETRY_PERFORMANCE_MEASUREMENTS=n
CONFIG_SOF_TELEMETRY_IO_PERFORMANCE_MEASUREMENTS=n

# If we turn telemetry off all together, we can use slot 2. Slot 1 is used by mtrace
# If we turn telemetry off altogether, we can use slot 2. Slot 1 is used by mtrace
#CONFIG_SOF_DEBUG_STREAM_SLOT_NUMBER=2
#CONFIG_SOF_TELEMETRY=n
#CONFIG_SOF_TELEMETRY_PERFORMANCE_MEASUREMENTS=n
#CONFIG_SOF_TELEMETRY_IO_PERFORMANCE_MEASUREMENTS=n

# Enable Zephyr exception printing hook; debug stream is sensitive to this option too
CONFIG_EXCEPTION_DUMP_HOOK=y
# Do not try to send exception prints through logs; this causes problems on PTL with mtrace
CONFIG_EXCEPTION_DUMP_HOOK_ONLY=y
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, this is ok for this overlay, but needs attention when/if we try to make this the default. The CI systems we have in place do not have debugstream runnign on DUTs, only mtrace, so this would in effect make dumps unavailable in CI.

# Print also backtrace through the exception hook
CONFIG_XTENSA_BACKTRACE_EXCEPTION_DUMP_HOOK=y
16 changes: 5 additions & 11 deletions src/debug/debug_stream/debug_stream_slot.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,12 @@
#include <rtos/string.h>
#include <user/debug_stream.h>
#include <user/debug_stream_slot.h>
#include <zephyr/kernel.h>

LOG_MODULE_REGISTER(debug_stream_slot);

struct cpu_mutex {
struct k_mutex m;
struct k_spinlock l;
} __aligned(CONFIG_DCACHE_LINE_SIZE);

/* CPU specific mutexes for each circular buffer */
Expand Down Expand Up @@ -66,6 +67,7 @@ int debug_stream_slot_send_record(struct debug_stream_record *rec)
debug_stream_get_circular_buffer(&desc, arch_proc_id());
uint32_t record_size = rec->size_words;
uint32_t record_start, buf_remain;
k_spinlock_key_t key;

LOG_DBG("Sending record %u id %u len %u", rec->seqno, rec->id, rec->size_words);

Expand All @@ -77,7 +79,7 @@ int debug_stream_slot_send_record(struct debug_stream_record *rec)
desc.buf_words, desc.core_id, desc.buf_words, desc.offset);
return -ENOMEM;
}
k_mutex_lock(&cpu_mutex[arch_proc_id()].m, K_FOREVER);
key = k_spin_lock(&cpu_mutex[arch_proc_id()].l);

rec->seqno = buf->next_seqno++;
rec->size_words = record_size + 1; /* +1 for size at the end of record */
Expand Down Expand Up @@ -105,7 +107,7 @@ int debug_stream_slot_send_record(struct debug_stream_record *rec)
buf->data[buf->w_ptr] = record_size + 1;
buf->w_ptr = (buf->w_ptr + 1) % desc.buf_words;

k_mutex_unlock(&cpu_mutex[arch_proc_id()].m);
k_spin_unlock(&cpu_mutex[arch_proc_id()].l, key);

LOG_DBG("Record %u id %u len %u sent", rec->seqno, rec->id, record_size);
return 0;
Expand Down Expand Up @@ -159,14 +161,6 @@ static int debug_stream_slot_init(void)

buf->next_seqno = 0;
buf->w_ptr = 0;
k_mutex_init(&cpu_mutex[i].m);
/* The core specific mutexes are now .bss which is uncached so the
* following line is commented out. However, since the mutexes are
* core specific there should be nothing preventing from having them
* in cached memory.
*
* sys_cache_data_flush_range(&cpu_mutex[i], sizeof(cpu_mutex[i]));
*/
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

k_spinlock_init()

Copy link
Contributor Author

@jsarha jsarha Feb 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hrm, I forgot that I already checked first time around that Zephyr spinlocks do not have such function or anything like it.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SOF has one

static inline void k_spinlock_init(struct k_spinlock *lock)
- you need it if you re-initialise a spinlock, or in fact if your spinlock isn't zero-initialised at all

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's arguably quite confusing we use Zephyr native calls, but have a SOF extension. I'd only use k_spinlock_init if we really need to re-initialize.

}
LOG_INF("Debug stream slot initialized");

Expand Down
110 changes: 110 additions & 0 deletions src/debug/debug_stream/debug_stream_text_msg.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,13 @@
#include <soc.h>
#include <adsp_debug_window.h>
#include <sof/common.h>
#include <zephyr/logging/log.h>
#include <zephyr/arch/exception.h>

#include <user/debug_stream_text_msg.h>

LOG_MODULE_REGISTER(debug_stream_text_msg);

void ds_msg(const char *format, ...)
{
va_list args;
Expand All @@ -33,3 +37,109 @@ void ds_msg(const char *format, ...)
sizeof(buf.msg.hdr.data[0]));
debug_stream_slot_send_record(&buf.msg.hdr);
}

#if defined(CONFIG_EXCEPTION_DUMP_HOOK)
/* The debug stream debug window slot is 4k, and when it is split
* between the cores and the header/other overhead is removed, with 5
* cores the size is 768 bytes. The dump record must be smaller than
* that to get through to the host side.
*
* Also, because of the limited circular buffer size, we should only
* send one exception record. Exceptions often happen in bursts in a
* SOF system, and sending more than one record makes the host-side
* decoder lose track of things.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

exceptions in bursts? We're talking about fatal exceptions here, which - well - should be fatal? Do you mean recursive (double etc.) exceptions or exceptions on multiple cores?

Copy link
Contributor Author

@jsarha jsarha Mar 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not exactly sure what goes on in the system, but the when I cause a fatal exception with assert(0), the xtensa_excint1_c() is triggered multiple (tens of) times. The practical solution here is to print only one and filter out the ones that do not have proper data.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jsarha wow, maybe we should understand those multiple exceptions. Could you check their causes? Perhaps addresses too? And if they're double exceptions? Maybe that's exactly what is breaking our exception debugging and by understanding them we can fix it? At least would be good to check the second exception. And then the third. To see from which point they start to recurse,

*/
static struct {
struct debug_stream_text_msg msg;
char text[640];
} __packed ds_buf[CONFIG_MP_MAX_NUM_CPUS];
static int reports_sent_cpu[CONFIG_MP_MAX_NUM_CPUS];
static size_t ds_pos[CONFIG_MP_MAX_NUM_CPUS];

static void ds_exception_drain(bool flush)
{
unsigned int cpu = arch_proc_id();

if (cpu >= CONFIG_MP_MAX_NUM_CPUS)
return;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this can happen. In worst case we would crash, so not sure if this check adds value.


if (flush) {
ds_pos[cpu] = 0;
reports_sent_cpu[cpu] = 0;
return;
}

if (ds_pos[cpu] == 0)
return;

if (reports_sent_cpu[cpu] > 0)
return;

ds_buf[cpu].msg.hdr.id = DEBUG_STREAM_RECORD_ID_TEXT_MSG;
ds_buf[cpu].msg.hdr.size_words =
SOF_DIV_ROUND_UP(sizeof(ds_buf[cpu].msg) + ds_pos[cpu],
sizeof(ds_buf[cpu].msg.hdr.data[0]));
/* Make sure the possible up to 3 extra bytes at end of msg are '\0' */
memset(ds_buf[cpu].text + ds_pos[cpu], 0,
ds_buf[cpu].msg.hdr.size_words * sizeof(ds_buf[cpu].msg.hdr.data[0]) - ds_pos[cpu]);
debug_stream_slot_send_record(&ds_buf[cpu].msg.hdr);
reports_sent_cpu[cpu] = 1;
ds_pos[cpu] = 0;
}

static void ds_exception_dump(const char *format, va_list args)
{
ssize_t len;
size_t avail;
size_t written;
unsigned int cpu = arch_proc_id();

if (cpu >= CONFIG_MP_MAX_NUM_CPUS)
return;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto here.


if (reports_sent_cpu[cpu] > 0)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we instead just limit based on availability? I.e. if no space, stop sending reports?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem is that we do not know where the host side read pointer is. From DSP side the host side python script is terribly slow, so slow that it practically stands still. So in practice we can only write less than the circular buffer size to avoid host side from loosing track of things.

The circular bufffers size on PTL, with 5 cores, is 768 bytes. The current max record size, 640 bytes, is selected to be a bit less that that. The 640 bytes can host a normal exception dump, but if the back trace is present it usually has to be truncated. So there really is room for only one exception dump record (per cpu), so there is no point in counting the bytes.

Of course we could busy wait sufficiently long time for the host to decode the previous record and then send another, but that is an advanced feature that can be added later if there is need for it.

return;

avail = sizeof(ds_buf[cpu].text) - ds_pos[cpu];
if (avail == 0) {
ds_exception_drain(false);
return;
}

if (format[0] == '\0')
return;

/* Skip useless " ** " prefix to save bytes */
if (strlen(format) >= 4 &&
format[0] == ' ' && format[1] == '*' && format[2] == '*' && format[3] == ' ')
format += 4;

len = vsnprintf(ds_buf[cpu].text + ds_pos[cpu], avail, format, args);
if (len < 0) {
ds_pos[cpu] = 0;
return;
}

if (len == 0)
return;

if ((size_t)len >= avail)
written = avail - 1;
else
written = (size_t)len;

ds_pos[cpu] += written;

if (ds_pos[cpu] >= sizeof(ds_buf[cpu].text))
ds_exception_drain(false);
}

static int init_exception_dump_hook(void)
{
set_exception_dump_hook(ds_exception_dump, ds_exception_drain);
LOG_INF("exception_dump_hook set");
return 0;
}

SYS_INIT(init_exception_dump_hook, APPLICATION, CONFIG_KERNEL_INIT_PRIORITY_DEFAULT);
#endif
5 changes: 3 additions & 2 deletions tools/debug_stream/debug_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,9 @@ def print_text_msg(self, record, cpu):
buffer = (
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typo in commit: s/star/start/

ctypes.c_ubyte * (len(record) - ctypes.sizeof(TextMsg))
).from_address(ctypes.addressof(record) + ctypes.sizeof(TextMsg))
msg = bytearray(buffer).decode("utf-8")
print("CPU %u: %s" % (cpu, msg))
payload = bytes(buffer)
msg = payload.split(b"\0", 1)[0].decode("utf-8", errors="replace")
print("CPU %u:\n%s" % (cpu, msg))
return True

class DebugStreamSectionDescriptor(ctypes.Structure):
Expand Down
Loading