From b33395a96a7c2152b7f10a6e0021fde433edf57b Mon Sep 17 00:00:00 2001 From: Bartek Tofel Date: Thu, 9 Apr 2026 13:19:16 +0200 Subject: [PATCH 1/9] dump inuse and memory profile --- framework/.changeset/v0.15.16.md | 1 + framework/leak/detector_cl_node.go | 26 +++++++++++++++++++------- 2 files changed, 20 insertions(+), 7 deletions(-) create mode 100644 framework/.changeset/v0.15.16.md diff --git a/framework/.changeset/v0.15.16.md b/framework/.changeset/v0.15.16.md new file mode 100644 index 000000000..a93c4d80f --- /dev/null +++ b/framework/.changeset/v0.15.16.md @@ -0,0 +1 @@ +- Dump two memory profiles: inuse and alloc diff --git a/framework/leak/detector_cl_node.go b/framework/leak/detector_cl_node.go index 423ca3ef3..3c9b4a45d 100644 --- a/framework/leak/detector_cl_node.go +++ b/framework/leak/detector_cl_node.go @@ -245,13 +245,25 @@ func (cd *CLNodesLeakDetector) Check(t *CLNodesCheck) error { Msg("Leaks info") framework.L.Info().Msg("Downloading pprof profile..") dumper := NewProfileDumper(framework.LocalPyroscopeBaseURL) - profilePath, err := dumper.MemoryProfile(&ProfileDumperConfig{ - ServiceName: "chainlink-node", - }) - if err != nil { - errs = append(errs, fmt.Errorf("failed to download Pyroscopt profile: %w", err)) - return errors.Join(errs...) + + profilesToDump := []string{DefaultProfileType, "memory:inuse_space:bytes:space:bytes"} + for _, profileType := range profilesToDump { + profileSplit := strings.Split(profileType, ":") + outputPath := DefaultOutputPath + if len(profileSplit) > 1 { + // e.g. for "memory:inuse_space:bytes:space:bytes" we want to have output file "memory-inuse_space.pprof" + outputPath = fmt.Sprintf("%s-%s.pprof", profileSplit[0], profileSplit[1]) + } + profilePath, err := dumper.MemoryProfile(&ProfileDumperConfig{ + ServiceName: "chainlink-node", + ProfileType: profileType, + OutputPath: outputPath, + }) + if err != nil { + errs = append(errs, fmt.Errorf("failed to download Pyroscope profile %s: %w", profileType, err)) + return errors.Join(errs...) + } + framework.L.Info().Str("Path", profilePath).Str("ProfileType", profileType).Msg("Saved pprof profile") } - framework.L.Info().Str("Path", profilePath).Msg("Saved pprof profile") return errors.Join(errs...) } From cf09c897700e947669ea7b58829091371e945c23 Mon Sep 17 00:00:00 2001 From: Bartek Tofel Date: Thu, 9 Apr 2026 17:48:53 +0200 Subject: [PATCH 2/9] add chainlink admin profile dump --- framework/.changeset/v0.15.16.md | 1 + framework/docker.go | 125 ++++++++++++++++++++++++++ framework/leak/detector_cl_node.go | 14 ++- framework/leak/node_dumper.go | 140 +++++++++++++++++++++++++++++ 4 files changed, 278 insertions(+), 2 deletions(-) create mode 100644 framework/leak/node_dumper.go diff --git a/framework/.changeset/v0.15.16.md b/framework/.changeset/v0.15.16.md index a93c4d80f..e23a9f230 100644 --- a/framework/.changeset/v0.15.16.md +++ b/framework/.changeset/v0.15.16.md @@ -1 +1,2 @@ - Dump two memory profiles: inuse and alloc +- Dump LOOPs profiles via admin command diff --git a/framework/docker.go b/framework/docker.go index 784b50d63..646da23b2 100644 --- a/framework/docker.go +++ b/framework/docker.go @@ -10,6 +10,7 @@ import ( "io" "os" "os/exec" + "path" "path/filepath" "regexp" "strings" @@ -186,6 +187,47 @@ func (dc *DockerClient) CopyFile(containerName, sourceFile, targetPath string) e return dc.copyToContainer(containerID, sourceFile, targetPath) } +// CopyFromContainer copies files from a container path and returns a tar archive stream. +func (dc *DockerClient) CopyFromContainer(containerName, sourcePath string) (io.ReadCloser, container.PathStat, error) { + return dc.CopyFromContainerWithContext(context.Background(), containerName, sourcePath) +} + +// CopyFromContainerWithContext copies files from a container path and returns a tar archive stream. +func (dc *DockerClient) CopyFromContainerWithContext(ctx context.Context, containerName, sourcePath string) (io.ReadCloser, container.PathStat, error) { + containerID, err := dc.findContainerIDByName(ctx, containerName) + if err != nil { + return nil, container.PathStat{}, fmt.Errorf("failed to find container ID by name: %s", containerName) + } + reader, stat, err := dc.cli.CopyFromContainer(ctx, containerID, sourcePath) + if err != nil { + return nil, container.PathStat{}, fmt.Errorf("could not copy from container %s path %s: %w", containerName, sourcePath, err) + } + return reader, stat, nil +} + +// CopyFromContainerToHost copies files from a container path and extracts them into hostDir. +// If sourcePath points to a directory, only its contents are placed inside hostDir. +func (dc *DockerClient) CopyFromContainerToHost(containerName, sourcePath, hostDir string) error { + return dc.CopyFromContainerToHostWithContext(context.Background(), containerName, sourcePath, hostDir) +} + +// CopyFromContainerToHostWithContext copies files from a container path and extracts them into hostDir. +// If sourcePath points to a directory, only its contents are placed inside hostDir. +func (dc *DockerClient) CopyFromContainerToHostWithContext(ctx context.Context, containerName, sourcePath, hostDir string) error { + reader, _, err := dc.CopyFromContainerWithContext(ctx, containerName, sourcePath) + if err != nil { + return err + } + defer reader.Close() + + if err := os.MkdirAll(hostDir, 0o755); err != nil { + return fmt.Errorf("failed to create host destination directory %s: %w", hostDir, err) + } + + stripTopDir := path.Base(path.Clean(sourcePath)) + return extractTarArchiveToHostDir(reader, hostDir, stripTopDir) +} + // findContainerIDByName finds a container ID by its name func (dc *DockerClient) findContainerIDByName(ctx context.Context, containerName string) (string, error) { containers, err := dc.cli.ContainerList(ctx, container.ListOptions{ @@ -245,6 +287,89 @@ func (dc *DockerClient) copyToContainer(containerID, sourceFile, targetPath stri return nil } +func extractTarArchiveToHostDir(reader io.Reader, hostDir, stripTopDir string) error { + tarReader := tar.NewReader(reader) + for { + header, err := tarReader.Next() + if err == io.EOF { + return nil + } + if err != nil { + return fmt.Errorf("failed reading tar stream: %w", err) + } + + relativePath, ok := normalizeTarEntryPath(header.Name, stripTopDir) + if !ok { + continue + } + targetPath := filepath.Join(hostDir, filepath.FromSlash(relativePath)) + if err := ensureSubpath(hostDir, targetPath); err != nil { + return err + } + + switch header.Typeflag { + case tar.TypeDir: + if err := os.MkdirAll(targetPath, 0o755); err != nil { + return fmt.Errorf("failed to create dir %s: %w", targetPath, err) + } + case tar.TypeReg, tar.TypeRegA: + if err := os.MkdirAll(filepath.Dir(targetPath), 0o755); err != nil { + return fmt.Errorf("failed to create parent dir for %s: %w", targetPath, err) + } + file, createErr := os.OpenFile(targetPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, os.FileMode(header.Mode)) + if createErr != nil { + return fmt.Errorf("failed to create file %s: %w", targetPath, createErr) + } + if _, copyErr := io.Copy(file, tarReader); copyErr != nil { + _ = file.Close() + return fmt.Errorf("failed to write file %s: %w", targetPath, copyErr) + } + if closeErr := file.Close(); closeErr != nil { + return fmt.Errorf("failed to close file %s: %w", targetPath, closeErr) + } + } + } +} + +func normalizeTarEntryPath(entryName, stripTopDir string) (string, bool) { + normalized := strings.TrimPrefix(entryName, "./") + normalized = path.Clean(normalized) + if normalized == "." || normalized == "/" { + return "", false + } + + if stripTopDir != "" { + stripTopDir = path.Clean(stripTopDir) + if normalized == stripTopDir { + return "", false + } + prefix := stripTopDir + "/" + normalized = strings.TrimPrefix(normalized, prefix) + } + + normalized = strings.TrimPrefix(normalized, "/") + if normalized == "" { + return "", false + } + return normalized, true +} + +func ensureSubpath(baseDir, targetPath string) error { + baseAbs, err := filepath.Abs(baseDir) + if err != nil { + return fmt.Errorf("failed to resolve absolute path for %s: %w", baseDir, err) + } + targetAbs, err := filepath.Abs(targetPath) + if err != nil { + return fmt.Errorf("failed to resolve absolute path for %s: %w", targetPath, err) + } + prefix := baseAbs + string(filepath.Separator) + if targetAbs != baseAbs && !strings.HasPrefix(targetAbs, prefix) { + return fmt.Errorf("unsafe path detected outside destination: %s", targetPath) + } + return nil +} + // SearchLogFile searches logfile using regex and return matches or error func SearchLogFile(fp string, regex string) ([]string, error) { file, err := os.Open(fp) diff --git a/framework/leak/detector_cl_node.go b/framework/leak/detector_cl_node.go index 3c9b4a45d..7c8df0414 100644 --- a/framework/leak/detector_cl_node.go +++ b/framework/leak/detector_cl_node.go @@ -1,6 +1,7 @@ package leak import ( + "context" "errors" "fmt" "strconv" @@ -243,10 +244,11 @@ func (cd *CLNodesLeakDetector) Check(t *CLNodesCheck) error { Str("TestDuration", t.End.Sub(t.Start).String()). Float64("TestDurationSec", t.End.Sub(t.Start).Seconds()). Msg("Leaks info") - framework.L.Info().Msg("Downloading pprof profile..") - dumper := NewProfileDumper(framework.LocalPyroscopeBaseURL) profilesToDump := []string{DefaultProfileType, "memory:inuse_space:bytes:space:bytes"} + framework.L.Info().Msgf("Downloading %d pprof profiles..", len(profilesToDump)) + dumper := NewProfileDumper(framework.LocalPyroscopeBaseURL) + for _, profileType := range profilesToDump { profileSplit := strings.Split(profileType, ":") outputPath := DefaultOutputPath @@ -265,5 +267,13 @@ func (cd *CLNodesLeakDetector) Check(t *CLNodesCheck) error { } framework.L.Info().Str("Path", profilePath).Str("ProfileType", profileType).Msg("Saved pprof profile") } + + ctx, cancel := context.WithTimeout(context.Background(), DefaultNodeProfileDumpTimeout) + defer cancel() + if err := DumpNodeProfiles(ctx, cd.nodesetName, DefaultAdminProfilesDir); err != nil { + framework.L.Error().Err(err).Msg("Failed to dump node profiles") + errs = append(errs, fmt.Errorf("failed to dump node profiles: %w", err)) + } + return errors.Join(errs...) } diff --git a/framework/leak/node_dumper.go b/framework/leak/node_dumper.go new file mode 100644 index 000000000..0d6a7a982 --- /dev/null +++ b/framework/leak/node_dumper.go @@ -0,0 +1,140 @@ +package leak + +import ( + "context" + "errors" + "fmt" + "os" + "path" + "path/filepath" + "regexp" + "strings" + "time" + + "github.com/docker/docker/api/types/container" + "github.com/docker/docker/client" + f "github.com/smartcontractkit/chainlink-testing-framework/framework" +) + +var containerNameSanitizer = regexp.MustCompile(`[^a-zA-Z0-9._-]`) + +const ( + DefaultAdminProfilesDir = "admin-profiles" + DefaultNodeProfileDumpTimeout = 5 * time.Minute +) + +// DumpNodeProfiles runs chainlink profile collection in each running container +// with a name containing namePattern and copies ./profiles content to dst/profile-. +func DumpNodeProfiles(ctx context.Context, namePattern, dst string) error { + f.L.Info(). + Str("NamePattern", namePattern). + Str("DestinationDir", dst). + Msg("Dumping node profiles by container name pattern") + + if strings.TrimSpace(namePattern) == "" { + return fmt.Errorf("container name pattern must not be empty") + } + if strings.TrimSpace(dst) == "" { + return fmt.Errorf("destination path must not be empty") + } + + if err := os.MkdirAll(dst, 0o755); err != nil { + return fmt.Errorf("failed to create destination directory %q: %w", dst, err) + } + + cli, err := client.NewClientWithOpts(client.FromEnv, client.WithAPIVersionNegotiation()) + if err != nil { + return fmt.Errorf("failed to create Docker client: %w", err) + } + defer cli.Close() + dc, err := f.NewDockerClient() + if err != nil { + return fmt.Errorf("failed to create framework docker client: %w", err) + } + + containers, err := runningContainers(ctx, cli) + if err != nil { + return err + } + + var errs []error + for _, c := range containers { + if !strings.Contains(c.name, namePattern) { + continue + } + + // Keep destination names safe and filesystem-friendly. + safeName := containerNameSanitizer.ReplaceAllString(c.name, "_") + targetDir := filepath.Join(dst, fmt.Sprintf("profile-%s", safeName)) + if err := os.MkdirAll(targetDir, 0o755); err != nil { + errs = append(errs, fmt.Errorf("failed to create destination directory for %s: %w", c.name, err)) + continue + } + + f.L.Info().Str("ContainerName", c.name).Msg("Collecting node profile") + + out, execErr := dc.ExecContainerWithContext( + ctx, + c.name, + []string{"chainlink", "admin", "profile", "-seconds", "1", "-output_dir", "./profiles"}, + ) + if execErr != nil { + errs = append(errs, fmt.Errorf("failed to execute profile command in container %s: %w, output: %s", c.name, execErr, strings.TrimSpace(out))) + continue + } + + profilesPath := path.Clean(path.Join(c.workingDir, "profiles")) + if copyErr := dc.CopyFromContainerToHostWithContext(ctx, c.name, profilesPath, targetDir); copyErr != nil { + errs = append(errs, fmt.Errorf("failed to copy profiles from container %s to %s: %w", c.name, targetDir, copyErr)) + continue + } + + f.L.Info().Str("ContainerName", c.name).Str("Destination", targetDir).Msg("Profiles copied") + } + + return errors.Join(errs...) +} + +type runningContainer struct { + name string + workingDir string +} + +func runningContainers(ctx context.Context, cli *client.Client) ([]runningContainer, error) { + containers, err := cli.ContainerList(ctx, container.ListOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to list running Docker containers: %w", err) + } + + res := make([]runningContainer, 0, len(containers)) + for _, c := range containers { + name := firstContainerName(c.Names) + if name == "" { + continue + } + + inspect, inspectErr := cli.ContainerInspect(ctx, c.ID) + if inspectErr != nil { + return nil, fmt.Errorf("failed to inspect container %s: %w", name, inspectErr) + } + workingDir := "/" + if inspect.Config != nil && inspect.Config.WorkingDir != "" { + workingDir = inspect.Config.WorkingDir + } + res = append(res, runningContainer{ + name: name, + workingDir: workingDir, + }) + } + return res, nil +} + +func firstContainerName(names []string) string { + for _, n := range names { + if n == "" { + continue + } + return strings.TrimPrefix(n, "/") + } + return "" +} From 8b60da49d290321c0e672d1ccfb630d92aaa555e Mon Sep 17 00:00:00 2001 From: Bartek Tofel Date: Mon, 13 Apr 2026 16:38:10 +0200 Subject: [PATCH 3/9] fix lints --- framework/docker.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/framework/docker.go b/framework/docker.go index 646da23b2..a44d6a1a9 100644 --- a/framework/docker.go +++ b/framework/docker.go @@ -312,14 +312,15 @@ func extractTarArchiveToHostDir(reader io.Reader, hostDir, stripTopDir string) e if err := os.MkdirAll(targetPath, 0o755); err != nil { return fmt.Errorf("failed to create dir %s: %w", targetPath, err) } - case tar.TypeReg, tar.TypeRegA: + case tar.TypeReg: if err := os.MkdirAll(filepath.Dir(targetPath), 0o755); err != nil { return fmt.Errorf("failed to create parent dir for %s: %w", targetPath, err) } - file, createErr := os.OpenFile(targetPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, os.FileMode(header.Mode)) + file, createErr := os.OpenFile(targetPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, header.FileInfo().Mode().Perm()) if createErr != nil { return fmt.Errorf("failed to create file %s: %w", targetPath, createErr) } + //nolint:gosec // G110: source is Docker daemon tar stream from local test containers if _, copyErr := io.Copy(file, tarReader); copyErr != nil { _ = file.Close() return fmt.Errorf("failed to write file %s: %w", targetPath, copyErr) From 8691593868d2e710dd8d34ecb45246dddf0933f8 Mon Sep 17 00:00:00 2001 From: Bartek Tofel Date: Tue, 14 Apr 2026 09:21:39 +0200 Subject: [PATCH 4/9] do not extract tar archive copied from Docker --- framework/docker.go | 111 ++++------------------------- framework/leak/detector_cl_node.go | 2 +- framework/leak/node_dumper.go | 14 ++-- 3 files changed, 19 insertions(+), 108 deletions(-) diff --git a/framework/docker.go b/framework/docker.go index a44d6a1a9..37b63e122 100644 --- a/framework/docker.go +++ b/framework/docker.go @@ -10,7 +10,6 @@ import ( "io" "os" "os/exec" - "path" "path/filepath" "regexp" "strings" @@ -205,27 +204,27 @@ func (dc *DockerClient) CopyFromContainerWithContext(ctx context.Context, contai return reader, stat, nil } -// CopyFromContainerToHost copies files from a container path and extracts them into hostDir. -// If sourcePath points to a directory, only its contents are placed inside hostDir. -func (dc *DockerClient) CopyFromContainerToHost(containerName, sourcePath, hostDir string) error { - return dc.CopyFromContainerToHostWithContext(context.Background(), containerName, sourcePath, hostDir) -} - -// CopyFromContainerToHostWithContext copies files from a container path and extracts them into hostDir. -// If sourcePath points to a directory, only its contents are placed inside hostDir. -func (dc *DockerClient) CopyFromContainerToHostWithContext(ctx context.Context, containerName, sourcePath, hostDir string) error { +// CopyFromContainerToTarWithContext writes the Docker copy tar stream to targetTarPath. +func (dc *DockerClient) CopyFromContainerToTarWithContext(ctx context.Context, containerName, sourcePath, targetTarPath string) error { reader, _, err := dc.CopyFromContainerWithContext(ctx, containerName, sourcePath) if err != nil { return err } defer reader.Close() - if err := os.MkdirAll(hostDir, 0o755); err != nil { - return fmt.Errorf("failed to create host destination directory %s: %w", hostDir, err) + if err := os.MkdirAll(filepath.Dir(targetTarPath), 0o755); err != nil { + return fmt.Errorf("failed to create destination directory for %s: %w", targetTarPath, err) } + file, err := os.Create(targetTarPath) + if err != nil { + return fmt.Errorf("failed to create destination archive %s: %w", targetTarPath, err) + } + defer file.Close() - stripTopDir := path.Base(path.Clean(sourcePath)) - return extractTarArchiveToHostDir(reader, hostDir, stripTopDir) + if _, err := io.Copy(file, reader); err != nil { + return fmt.Errorf("failed to write archive %s: %w", targetTarPath, err) + } + return nil } // findContainerIDByName finds a container ID by its name @@ -287,90 +286,6 @@ func (dc *DockerClient) copyToContainer(containerID, sourceFile, targetPath stri return nil } -func extractTarArchiveToHostDir(reader io.Reader, hostDir, stripTopDir string) error { - tarReader := tar.NewReader(reader) - for { - header, err := tarReader.Next() - if err == io.EOF { - return nil - } - if err != nil { - return fmt.Errorf("failed reading tar stream: %w", err) - } - - relativePath, ok := normalizeTarEntryPath(header.Name, stripTopDir) - if !ok { - continue - } - targetPath := filepath.Join(hostDir, filepath.FromSlash(relativePath)) - if err := ensureSubpath(hostDir, targetPath); err != nil { - return err - } - - switch header.Typeflag { - case tar.TypeDir: - if err := os.MkdirAll(targetPath, 0o755); err != nil { - return fmt.Errorf("failed to create dir %s: %w", targetPath, err) - } - case tar.TypeReg: - if err := os.MkdirAll(filepath.Dir(targetPath), 0o755); err != nil { - return fmt.Errorf("failed to create parent dir for %s: %w", targetPath, err) - } - file, createErr := os.OpenFile(targetPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, header.FileInfo().Mode().Perm()) - if createErr != nil { - return fmt.Errorf("failed to create file %s: %w", targetPath, createErr) - } - //nolint:gosec // G110: source is Docker daemon tar stream from local test containers - if _, copyErr := io.Copy(file, tarReader); copyErr != nil { - _ = file.Close() - return fmt.Errorf("failed to write file %s: %w", targetPath, copyErr) - } - if closeErr := file.Close(); closeErr != nil { - return fmt.Errorf("failed to close file %s: %w", targetPath, closeErr) - } - } - } -} - -func normalizeTarEntryPath(entryName, stripTopDir string) (string, bool) { - normalized := strings.TrimPrefix(entryName, "./") - normalized = path.Clean(normalized) - if normalized == "." || normalized == "/" { - return "", false - } - - if stripTopDir != "" { - stripTopDir = path.Clean(stripTopDir) - if normalized == stripTopDir { - return "", false - } - prefix := stripTopDir + "/" - normalized = strings.TrimPrefix(normalized, prefix) - } - - normalized = strings.TrimPrefix(normalized, "/") - if normalized == "" { - return "", false - } - return normalized, true -} - -func ensureSubpath(baseDir, targetPath string) error { - baseAbs, err := filepath.Abs(baseDir) - if err != nil { - return fmt.Errorf("failed to resolve absolute path for %s: %w", baseDir, err) - } - targetAbs, err := filepath.Abs(targetPath) - if err != nil { - return fmt.Errorf("failed to resolve absolute path for %s: %w", targetPath, err) - } - prefix := baseAbs + string(filepath.Separator) - if targetAbs != baseAbs && !strings.HasPrefix(targetAbs, prefix) { - return fmt.Errorf("unsafe path detected outside destination: %s", targetPath) - } - return nil -} - // SearchLogFile searches logfile using regex and return matches or error func SearchLogFile(fp string, regex string) ([]string, error) { file, err := os.Open(fp) diff --git a/framework/leak/detector_cl_node.go b/framework/leak/detector_cl_node.go index 7c8df0414..d356d7861 100644 --- a/framework/leak/detector_cl_node.go +++ b/framework/leak/detector_cl_node.go @@ -270,7 +270,7 @@ func (cd *CLNodesLeakDetector) Check(t *CLNodesCheck) error { ctx, cancel := context.WithTimeout(context.Background(), DefaultNodeProfileDumpTimeout) defer cancel() - if err := DumpNodeProfiles(ctx, cd.nodesetName, DefaultAdminProfilesDir); err != nil { + if err := DumpNodeProfiles(ctx, cd.nodesetName+"-node", DefaultAdminProfilesDir); err != nil { framework.L.Error().Err(err).Msg("Failed to dump node profiles") errs = append(errs, fmt.Errorf("failed to dump node profiles: %w", err)) } diff --git a/framework/leak/node_dumper.go b/framework/leak/node_dumper.go index 0d6a7a982..143bd31d0 100644 --- a/framework/leak/node_dumper.go +++ b/framework/leak/node_dumper.go @@ -24,7 +24,7 @@ const ( ) // DumpNodeProfiles runs chainlink profile collection in each running container -// with a name containing namePattern and copies ./profiles content to dst/profile-. +// with a name containing namePattern and saves ./profiles as dst/profile-.tar. func DumpNodeProfiles(ctx context.Context, namePattern, dst string) error { f.L.Info(). Str("NamePattern", namePattern). @@ -65,11 +65,7 @@ func DumpNodeProfiles(ctx context.Context, namePattern, dst string) error { // Keep destination names safe and filesystem-friendly. safeName := containerNameSanitizer.ReplaceAllString(c.name, "_") - targetDir := filepath.Join(dst, fmt.Sprintf("profile-%s", safeName)) - if err := os.MkdirAll(targetDir, 0o755); err != nil { - errs = append(errs, fmt.Errorf("failed to create destination directory for %s: %w", c.name, err)) - continue - } + targetArchivePath := filepath.Join(dst, fmt.Sprintf("profile-%s.tar", safeName)) f.L.Info().Str("ContainerName", c.name).Msg("Collecting node profile") @@ -84,12 +80,12 @@ func DumpNodeProfiles(ctx context.Context, namePattern, dst string) error { } profilesPath := path.Clean(path.Join(c.workingDir, "profiles")) - if copyErr := dc.CopyFromContainerToHostWithContext(ctx, c.name, profilesPath, targetDir); copyErr != nil { - errs = append(errs, fmt.Errorf("failed to copy profiles from container %s to %s: %w", c.name, targetDir, copyErr)) + if copyErr := dc.CopyFromContainerToTarWithContext(ctx, c.name, profilesPath, targetArchivePath); copyErr != nil { + errs = append(errs, fmt.Errorf("failed to copy profiles archive from container %s to %s: %w", c.name, targetArchivePath, copyErr)) continue } - f.L.Info().Str("ContainerName", c.name).Str("Destination", targetDir).Msg("Profiles copied") + f.L.Info().Str("ContainerName", c.name).Str("Destination", targetArchivePath).Msg("Profiles copied as archive") } return errors.Join(errs...) From 85d1ffc7fadabf022288c40297a0ce06263a6975 Mon Sep 17 00:00:00 2001 From: Bartek Tofel Date: Tue, 14 Apr 2026 16:15:33 +0200 Subject: [PATCH 5/9] do not configure Pyroscpe by default; login as admin before dumping profiles --- framework/components/clnode/default.go | 5 -- framework/leak/node_dumper.go | 72 ++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 5 deletions(-) diff --git a/framework/components/clnode/default.go b/framework/components/clnode/default.go index ffb8a4f6f..a03277c80 100644 --- a/framework/components/clnode/default.go +++ b/framework/components/clnode/default.go @@ -19,11 +19,6 @@ const defaultConfigTmpl = ` [Log] Level = 'debug' -[Pyroscope] -ServerAddress = 'http://pyroscope:4040' -Environment = 'local' -LinkTracesToProfiles = true - [WebServer] HTTPWriteTimeout = '30s' SecureCookies = false diff --git a/framework/leak/node_dumper.go b/framework/leak/node_dumper.go index 143bd31d0..cdc8e602a 100644 --- a/framework/leak/node_dumper.go +++ b/framework/leak/node_dumper.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "io" "os" "path" "path/filepath" @@ -14,6 +15,7 @@ import ( "github.com/docker/docker/api/types/container" "github.com/docker/docker/client" f "github.com/smartcontractkit/chainlink-testing-framework/framework" + "github.com/smartcontractkit/chainlink-testing-framework/framework/components/clnode" ) var containerNameSanitizer = regexp.MustCompile(`[^a-zA-Z0-9._-]`) @@ -67,6 +69,10 @@ func DumpNodeProfiles(ctx context.Context, namePattern, dst string) error { safeName := containerNameSanitizer.ReplaceAllString(c.name, "_") targetArchivePath := filepath.Join(dst, fmt.Sprintf("profile-%s.tar", safeName)) + if err := loginCLINodeAdmin(ctx, cli, c); err != nil { + return err + } + f.L.Info().Str("ContainerName", c.name).Msg("Collecting node profile") out, execErr := dc.ExecContainerWithContext( @@ -92,6 +98,7 @@ func DumpNodeProfiles(ctx context.Context, namePattern, dst string) error { } type runningContainer struct { + id string name string workingDir string } @@ -118,6 +125,7 @@ func runningContainers(ctx context.Context, cli *client.Client) ([]runningContai workingDir = inspect.Config.WorkingDir } res = append(res, runningContainer{ + id: c.ID, name: name, workingDir: workingDir, }) @@ -134,3 +142,67 @@ func firstContainerName(names []string) string { } return "" } + +func loginCLINodeAdmin(ctx context.Context, cli *client.Client, c runningContainer) error { + credsPath := path.Clean(path.Join(c.workingDir, "creds.txt")) + createCredsCmd := []string{ + "sh", + "-lc", + fmt.Sprintf( + "printf '%%s\\n%%s\\n' %s %s > %s", + shellQuote(clnode.DefaultAPIUser), + shellQuote(clnode.DefaultAPIPassword), + shellQuote(credsPath), + ), + } + createOut, createExit, createErr := execContainerWithExitCode(ctx, cli, c.id, createCredsCmd) + if createErr != nil { + return fmt.Errorf("failed to create creds.txt in container %s: %w, output: %s", c.name, createErr, strings.TrimSpace(createOut)) + } + if createExit != 0 { + return fmt.Errorf("failed to create creds.txt in container %s: exit code %d, output: %s", c.name, createExit, strings.TrimSpace(createOut)) + } + + loginCmd := []string{"chainlink", "admin", "login", "-f", credsPath, "--bypass-version-check"} + loginOut, loginExit, loginErr := execContainerWithExitCode(ctx, cli, c.id, loginCmd) + if loginErr != nil { + return fmt.Errorf("failed to login admin via CLI for container %s: %w, output: %s", c.name, loginErr, strings.TrimSpace(loginOut)) + } + if loginExit != 0 { + return fmt.Errorf("failed to login admin via CLI for container %s: exit code %d, output: %s", c.name, loginExit, strings.TrimSpace(loginOut)) + } + return nil +} + +func execContainerWithExitCode(ctx context.Context, cli *client.Client, containerID string, cmd []string) (string, int, error) { + execResp, err := cli.ContainerExecCreate(ctx, containerID, container.ExecOptions{ + Cmd: cmd, + AttachStdout: true, + AttachStderr: true, + }) + if err != nil { + return "", -1, fmt.Errorf("failed to create exec in container %s: %w", containerID, err) + } + + attachResp, err := cli.ContainerExecAttach(ctx, execResp.ID, container.ExecStartOptions{}) + if err != nil { + return "", -1, fmt.Errorf("failed to attach to exec in container %s: %w", containerID, err) + } + defer attachResp.Close() + + outBytes, err := io.ReadAll(attachResp.Reader) + if err != nil { + return "", -1, fmt.Errorf("failed to read exec output in container %s: %w", containerID, err) + } + output := string(outBytes) + + execInspect, err := cli.ContainerExecInspect(ctx, execResp.ID) + if err != nil { + return output, -1, fmt.Errorf("failed to inspect exec in container %s: %w", containerID, err) + } + return output, execInspect.ExitCode, nil +} + +func shellQuote(s string) string { + return "'" + strings.ReplaceAll(s, "'", `'"'"'`) + "'" +} From daef07082e70cb978855b28e37b418091a32160c Mon Sep 17 00:00:00 2001 From: Bartek Tofel Date: Tue, 14 Apr 2026 17:33:58 +0200 Subject: [PATCH 6/9] do not check memory use as a temp hack --- framework/leak/detector_cl_node.go | 240 ++++++++++++++--------------- 1 file changed, 120 insertions(+), 120 deletions(-) diff --git a/framework/leak/detector_cl_node.go b/framework/leak/detector_cl_node.go index d356d7861..885d29801 100644 --- a/framework/leak/detector_cl_node.go +++ b/framework/leak/detector_cl_node.go @@ -139,134 +139,134 @@ func (cd *CLNodesLeakDetector) checkContainerUptime(t *CLNodesCheck, nodeIdx int // Check runs all resource leak checks and returns errors if threshold reached for any of them func (cd *CLNodesLeakDetector) Check(t *CLNodesCheck) error { - if t.NumNodes == 0 { - return fmt.Errorf("cl nodes num must be > 0") - } - memMeasurements := make([]*Measurement, 0) - cpuMeasurements := make([]*Measurement, 0) - uptimes := make([]float64, 0) + // if t.NumNodes == 0 { + // return fmt.Errorf("cl nodes num must be > 0") + // } + // memMeasurements := make([]*Measurement, 0) + // cpuMeasurements := make([]*Measurement, 0) + // uptimes := make([]float64, 0) errs := make([]error, 0) - for i := range t.NumNodes { + // for i := range t.NumNodes { - switch t.ComparisonMode { - case ComparisonModePercentage: - fallthrough - case ComparisonModeDiff: - memMeasurement, err := cd.c.MeasureDelta(&CheckConfig{ - ComparisonMode: t.ComparisonMode, - Query: fmt.Sprintf(cd.MemoryQuery, i), - Start: t.Start, - End: t.End, - WarmUpDuration: t.WarmUpDuration, - }) - if err != nil { - return fmt.Errorf("memory leak check failed: %w", err) - } - memMeasurements = append(memMeasurements, memMeasurement) + // switch t.ComparisonMode { + // case ComparisonModePercentage: + // fallthrough + // case ComparisonModeDiff: + // memMeasurement, err := cd.c.MeasureDelta(&CheckConfig{ + // ComparisonMode: t.ComparisonMode, + // Query: fmt.Sprintf(cd.MemoryQuery, i), + // Start: t.Start, + // End: t.End, + // WarmUpDuration: t.WarmUpDuration, + // }) + // if err != nil { + // return fmt.Errorf("memory leak check failed: %w", err) + // } + // memMeasurements = append(memMeasurements, memMeasurement) - cpuMeasurement, err := cd.c.MeasureDelta(&CheckConfig{ - ComparisonMode: t.ComparisonMode, - Query: fmt.Sprintf(cd.CPUQuery, i), - Start: t.Start, - End: t.End, - WarmUpDuration: t.WarmUpDuration, - }) - if err != nil { - return fmt.Errorf("cpu leak check failed: %w", err) - } - cpuMeasurements = append(cpuMeasurements, cpuMeasurement) + // cpuMeasurement, err := cd.c.MeasureDelta(&CheckConfig{ + // ComparisonMode: t.ComparisonMode, + // Query: fmt.Sprintf(cd.CPUQuery, i), + // Start: t.Start, + // End: t.End, + // WarmUpDuration: t.WarmUpDuration, + // }) + // if err != nil { + // return fmt.Errorf("cpu leak check failed: %w", err) + // } + // cpuMeasurements = append(cpuMeasurements, cpuMeasurement) - if memMeasurement.Delta >= t.MemoryThreshold { - errs = append(errs, fmt.Errorf( - "Memory leak detected for node %d and interval: [%s -> %s], diff: %.f, comparison mode: %s", - i, t.Start, t.End, memMeasurement.Delta, t.ComparisonMode, - )) - } - if cpuMeasurement.Delta >= t.CPUThreshold { - errs = append(errs, fmt.Errorf( - "CPU leak detected for node %d and interval: [%s -> %s], diff: %.f, comparison mode: %s", - i, t.Start, t.End, cpuMeasurement.Delta, t.ComparisonMode, - )) - } - case ComparisonModeAbsolute: - memMeasurement, err := cd.c.MeasureDelta(&CheckConfig{ - ComparisonMode: t.ComparisonMode, - Query: fmt.Sprintf(cd.MemoryQueryAbsolute, i), - Start: t.Start, - End: t.End, - WarmUpDuration: t.WarmUpDuration, - }) - if err != nil { - return fmt.Errorf("memory leak check failed: %w", err) - } - memMeasurements = append(memMeasurements, memMeasurement) + // if memMeasurement.Delta >= t.MemoryThreshold { + // errs = append(errs, fmt.Errorf( + // "Memory leak detected for node %d and interval: [%s -> %s], diff: %.f, comparison mode: %s", + // i, t.Start, t.End, memMeasurement.Delta, t.ComparisonMode, + // )) + // } + // if cpuMeasurement.Delta >= t.CPUThreshold { + // errs = append(errs, fmt.Errorf( + // "CPU leak detected for node %d and interval: [%s -> %s], diff: %.f, comparison mode: %s", + // i, t.Start, t.End, cpuMeasurement.Delta, t.ComparisonMode, + // )) + // } + // case ComparisonModeAbsolute: + // memMeasurement, err := cd.c.MeasureDelta(&CheckConfig{ + // ComparisonMode: t.ComparisonMode, + // Query: fmt.Sprintf(cd.MemoryQueryAbsolute, i), + // Start: t.Start, + // End: t.End, + // WarmUpDuration: t.WarmUpDuration, + // }) + // if err != nil { + // return fmt.Errorf("memory leak check failed: %w", err) + // } + // memMeasurements = append(memMeasurements, memMeasurement) - cpuMeasurement, err := cd.c.MeasureDelta(&CheckConfig{ - ComparisonMode: t.ComparisonMode, - Query: fmt.Sprintf(cd.CPUQueryAbsolute, i), - Start: t.Start, - End: t.End, - WarmUpDuration: t.WarmUpDuration, - }) - if err != nil { - return fmt.Errorf("cpu leak check failed: %w", err) - } - cpuMeasurements = append(cpuMeasurements, cpuMeasurement) - if memMeasurement.End >= t.MemoryThreshold { - errs = append(errs, fmt.Errorf( - "Memory leak detected for node %d and interval: [%s -> %s], diff: %.f, comparison mode: %s", - i, t.Start, t.End, memMeasurement.End, t.ComparisonMode, - )) - } - if cpuMeasurement.End >= t.CPUThreshold { - errs = append(errs, fmt.Errorf( - "CPU leak detected for node %d and interval: [%s -> %s], diff: %.f, comparison mode: %s", - i, t.Start, t.End, cpuMeasurement.End, t.ComparisonMode, - )) - } - default: - return fmt.Errorf("comparison mode is incorrect: %s, see available leak.ComparisonMode constants", t.ComparisonMode) - } + // cpuMeasurement, err := cd.c.MeasureDelta(&CheckConfig{ + // ComparisonMode: t.ComparisonMode, + // Query: fmt.Sprintf(cd.CPUQueryAbsolute, i), + // Start: t.Start, + // End: t.End, + // WarmUpDuration: t.WarmUpDuration, + // }) + // if err != nil { + // return fmt.Errorf("cpu leak check failed: %w", err) + // } + // cpuMeasurements = append(cpuMeasurements, cpuMeasurement) + // if memMeasurement.End >= t.MemoryThreshold { + // errs = append(errs, fmt.Errorf( + // "Memory leak detected for node %d and interval: [%s -> %s], diff: %.f, comparison mode: %s", + // i, t.Start, t.End, memMeasurement.End, t.ComparisonMode, + // )) + // } + // if cpuMeasurement.End >= t.CPUThreshold { + // errs = append(errs, fmt.Errorf( + // "CPU leak detected for node %d and interval: [%s -> %s], diff: %.f, comparison mode: %s", + // i, t.Start, t.End, cpuMeasurement.End, t.ComparisonMode, + // )) + // } + // default: + // return fmt.Errorf("comparison mode is incorrect: %s, see available leak.ComparisonMode constants", t.ComparisonMode) + // } - uptime, err := cd.checkContainerUptime(t, i) - if err != nil { - errs = append(errs, fmt.Errorf( - "Container uptime issue for node %d and interval: [%s -> %s], uptime: %.f, err: %w", - i, t.Start, t.End, uptime, err, - )) - } - uptimes = append(uptimes, uptime) - } - framework.L.Info(). - Any("MemoryDiffs", memMeasurements). - Any("CPUDiffs", cpuMeasurements). - Any("Uptimes", uptimes). - Str("TestDuration", t.End.Sub(t.Start).String()). - Float64("TestDurationSec", t.End.Sub(t.Start).Seconds()). - Msg("Leaks info") + // uptime, err := cd.checkContainerUptime(t, i) + // if err != nil { + // errs = append(errs, fmt.Errorf( + // "Container uptime issue for node %d and interval: [%s -> %s], uptime: %.f, err: %w", + // i, t.Start, t.End, uptime, err, + // )) + // } + // uptimes = append(uptimes, uptime) + // } + // framework.L.Info(). + // Any("MemoryDiffs", memMeasurements). + // Any("CPUDiffs", cpuMeasurements). + // Any("Uptimes", uptimes). + // Str("TestDuration", t.End.Sub(t.Start).String()). + // Float64("TestDurationSec", t.End.Sub(t.Start).Seconds()). + // Msg("Leaks info") - profilesToDump := []string{DefaultProfileType, "memory:inuse_space:bytes:space:bytes"} - framework.L.Info().Msgf("Downloading %d pprof profiles..", len(profilesToDump)) - dumper := NewProfileDumper(framework.LocalPyroscopeBaseURL) + // profilesToDump := []string{DefaultProfileType, "memory:inuse_space:bytes:space:bytes"} + // framework.L.Info().Msgf("Downloading %d pprof profiles..", len(profilesToDump)) + // dumper := NewProfileDumper(framework.LocalPyroscopeBaseURL) - for _, profileType := range profilesToDump { - profileSplit := strings.Split(profileType, ":") - outputPath := DefaultOutputPath - if len(profileSplit) > 1 { - // e.g. for "memory:inuse_space:bytes:space:bytes" we want to have output file "memory-inuse_space.pprof" - outputPath = fmt.Sprintf("%s-%s.pprof", profileSplit[0], profileSplit[1]) - } - profilePath, err := dumper.MemoryProfile(&ProfileDumperConfig{ - ServiceName: "chainlink-node", - ProfileType: profileType, - OutputPath: outputPath, - }) - if err != nil { - errs = append(errs, fmt.Errorf("failed to download Pyroscope profile %s: %w", profileType, err)) - return errors.Join(errs...) - } - framework.L.Info().Str("Path", profilePath).Str("ProfileType", profileType).Msg("Saved pprof profile") - } + // for _, profileType := range profilesToDump { + // profileSplit := strings.Split(profileType, ":") + // outputPath := DefaultOutputPath + // if len(profileSplit) > 1 { + // // e.g. for "memory:inuse_space:bytes:space:bytes" we want to have output file "memory-inuse_space.pprof" + // outputPath = fmt.Sprintf("%s-%s.pprof", profileSplit[0], profileSplit[1]) + // } + // profilePath, err := dumper.MemoryProfile(&ProfileDumperConfig{ + // ServiceName: "chainlink-node", + // ProfileType: profileType, + // OutputPath: outputPath, + // }) + // if err != nil { + // errs = append(errs, fmt.Errorf("failed to download Pyroscope profile %s: %w", profileType, err)) + // return errors.Join(errs...) + // } + // framework.L.Info().Str("Path", profilePath).Str("ProfileType", profileType).Msg("Saved pprof profile") + // } ctx, cancel := context.WithTimeout(context.Background(), DefaultNodeProfileDumpTimeout) defer cancel() From ca58bb8b91dcac7550d1e6031e5da8634927d9ed Mon Sep 17 00:00:00 2001 From: Bartek Tofel Date: Tue, 14 Apr 2026 17:40:35 +0200 Subject: [PATCH 7/9] either dump admin profiles or pyroscope --- framework/leak/detector_cl_node.go | 285 ++++++++++++++++------------- 1 file changed, 159 insertions(+), 126 deletions(-) diff --git a/framework/leak/detector_cl_node.go b/framework/leak/detector_cl_node.go index 885d29801..5d8abef9c 100644 --- a/framework/leak/detector_cl_node.go +++ b/framework/leak/detector_cl_node.go @@ -44,7 +44,9 @@ type CLNodesLeakDetector struct { ContainerAliveQuery string c *ResourceLeakChecker - nodesetName string + nodesetName string + dumpPyroscopeProfiles bool + dumpAdminProfiles bool } // WithCPUQuery allows to override CPU leak query (Prometheus) @@ -71,6 +73,22 @@ func WithNodesetName(name string) func(*CLNodesLeakDetector) { } } +// WithDumpPyroscopeProfiles allows to dump Pyroscope profiles for each node at the end of the test. +// Dumped profiles are aggragate (cumulative) profiles from the whole test duration. +func WithDumpPyroscopeProfiles(dump bool) func(*CLNodesLeakDetector) { + return func(cd *CLNodesLeakDetector) { + cd.dumpPyroscopeProfiles = dump + } +} + +// WithDumpAdminProfiles allows to dump admin profiles for each node at the end of the test. +// Uses CL node's debug endpoint to fetch pprof snapshots. +func WithDumpAdminProfiles(dump bool) func(*CLNodesLeakDetector) { + return func(cd *CLNodesLeakDetector) { + cd.dumpAdminProfiles = dump + } +} + // sanitizeNodesetName escapes characters that would corrupt fmt.Sprintf format strings // or invalidate PromQL double-quoted label literals. func sanitizeNodesetName(name string) string { @@ -109,6 +127,15 @@ func NewCLNodesLeakDetector(c *ResourceLeakChecker, opts ...func(*CLNodesLeakDet cd.MemoryQueryAbsolute = replaceNodeset(cd.MemoryQueryAbsolute) } + if cd.dumpPyroscopeProfiles == true && cd.dumpAdminProfiles == true { + return nil, fmt.Errorf("both Pyroscope and admin profile dumping enabled, please choose only one. Dumping admin profiles will fail if Pyroscope is enabled.") + } + + if cd.dumpAdminProfiles == false && cd.dumpPyroscopeProfiles == false { + // default to dumping admin profiles since that's what engineers prefer + cd.dumpAdminProfiles = true + } + return cd, nil } @@ -139,140 +166,146 @@ func (cd *CLNodesLeakDetector) checkContainerUptime(t *CLNodesCheck, nodeIdx int // Check runs all resource leak checks and returns errors if threshold reached for any of them func (cd *CLNodesLeakDetector) Check(t *CLNodesCheck) error { - // if t.NumNodes == 0 { - // return fmt.Errorf("cl nodes num must be > 0") - // } - // memMeasurements := make([]*Measurement, 0) - // cpuMeasurements := make([]*Measurement, 0) - // uptimes := make([]float64, 0) + if t.NumNodes == 0 { + return fmt.Errorf("cl nodes num must be > 0") + } + memMeasurements := make([]*Measurement, 0) + cpuMeasurements := make([]*Measurement, 0) + uptimes := make([]float64, 0) errs := make([]error, 0) - // for i := range t.NumNodes { + for i := range t.NumNodes { - // switch t.ComparisonMode { - // case ComparisonModePercentage: - // fallthrough - // case ComparisonModeDiff: - // memMeasurement, err := cd.c.MeasureDelta(&CheckConfig{ - // ComparisonMode: t.ComparisonMode, - // Query: fmt.Sprintf(cd.MemoryQuery, i), - // Start: t.Start, - // End: t.End, - // WarmUpDuration: t.WarmUpDuration, - // }) - // if err != nil { - // return fmt.Errorf("memory leak check failed: %w", err) - // } - // memMeasurements = append(memMeasurements, memMeasurement) + switch t.ComparisonMode { + case ComparisonModePercentage: + fallthrough + case ComparisonModeDiff: + memMeasurement, err := cd.c.MeasureDelta(&CheckConfig{ + ComparisonMode: t.ComparisonMode, + Query: fmt.Sprintf(cd.MemoryQuery, i), + Start: t.Start, + End: t.End, + WarmUpDuration: t.WarmUpDuration, + }) + if err != nil { + return fmt.Errorf("memory leak check failed: %w", err) + } + memMeasurements = append(memMeasurements, memMeasurement) - // cpuMeasurement, err := cd.c.MeasureDelta(&CheckConfig{ - // ComparisonMode: t.ComparisonMode, - // Query: fmt.Sprintf(cd.CPUQuery, i), - // Start: t.Start, - // End: t.End, - // WarmUpDuration: t.WarmUpDuration, - // }) - // if err != nil { - // return fmt.Errorf("cpu leak check failed: %w", err) - // } - // cpuMeasurements = append(cpuMeasurements, cpuMeasurement) + cpuMeasurement, err := cd.c.MeasureDelta(&CheckConfig{ + ComparisonMode: t.ComparisonMode, + Query: fmt.Sprintf(cd.CPUQuery, i), + Start: t.Start, + End: t.End, + WarmUpDuration: t.WarmUpDuration, + }) + if err != nil { + return fmt.Errorf("cpu leak check failed: %w", err) + } + cpuMeasurements = append(cpuMeasurements, cpuMeasurement) - // if memMeasurement.Delta >= t.MemoryThreshold { - // errs = append(errs, fmt.Errorf( - // "Memory leak detected for node %d and interval: [%s -> %s], diff: %.f, comparison mode: %s", - // i, t.Start, t.End, memMeasurement.Delta, t.ComparisonMode, - // )) - // } - // if cpuMeasurement.Delta >= t.CPUThreshold { - // errs = append(errs, fmt.Errorf( - // "CPU leak detected for node %d and interval: [%s -> %s], diff: %.f, comparison mode: %s", - // i, t.Start, t.End, cpuMeasurement.Delta, t.ComparisonMode, - // )) - // } - // case ComparisonModeAbsolute: - // memMeasurement, err := cd.c.MeasureDelta(&CheckConfig{ - // ComparisonMode: t.ComparisonMode, - // Query: fmt.Sprintf(cd.MemoryQueryAbsolute, i), - // Start: t.Start, - // End: t.End, - // WarmUpDuration: t.WarmUpDuration, - // }) - // if err != nil { - // return fmt.Errorf("memory leak check failed: %w", err) - // } - // memMeasurements = append(memMeasurements, memMeasurement) + if memMeasurement.Delta >= t.MemoryThreshold { + errs = append(errs, fmt.Errorf( + "Memory leak detected for node %d and interval: [%s -> %s], diff: %.f, comparison mode: %s", + i, t.Start, t.End, memMeasurement.Delta, t.ComparisonMode, + )) + } + if cpuMeasurement.Delta >= t.CPUThreshold { + errs = append(errs, fmt.Errorf( + "CPU leak detected for node %d and interval: [%s -> %s], diff: %.f, comparison mode: %s", + i, t.Start, t.End, cpuMeasurement.Delta, t.ComparisonMode, + )) + } + case ComparisonModeAbsolute: + memMeasurement, err := cd.c.MeasureDelta(&CheckConfig{ + ComparisonMode: t.ComparisonMode, + Query: fmt.Sprintf(cd.MemoryQueryAbsolute, i), + Start: t.Start, + End: t.End, + WarmUpDuration: t.WarmUpDuration, + }) + if err != nil { + return fmt.Errorf("memory leak check failed: %w", err) + } + memMeasurements = append(memMeasurements, memMeasurement) - // cpuMeasurement, err := cd.c.MeasureDelta(&CheckConfig{ - // ComparisonMode: t.ComparisonMode, - // Query: fmt.Sprintf(cd.CPUQueryAbsolute, i), - // Start: t.Start, - // End: t.End, - // WarmUpDuration: t.WarmUpDuration, - // }) - // if err != nil { - // return fmt.Errorf("cpu leak check failed: %w", err) - // } - // cpuMeasurements = append(cpuMeasurements, cpuMeasurement) - // if memMeasurement.End >= t.MemoryThreshold { - // errs = append(errs, fmt.Errorf( - // "Memory leak detected for node %d and interval: [%s -> %s], diff: %.f, comparison mode: %s", - // i, t.Start, t.End, memMeasurement.End, t.ComparisonMode, - // )) - // } - // if cpuMeasurement.End >= t.CPUThreshold { - // errs = append(errs, fmt.Errorf( - // "CPU leak detected for node %d and interval: [%s -> %s], diff: %.f, comparison mode: %s", - // i, t.Start, t.End, cpuMeasurement.End, t.ComparisonMode, - // )) - // } - // default: - // return fmt.Errorf("comparison mode is incorrect: %s, see available leak.ComparisonMode constants", t.ComparisonMode) - // } + cpuMeasurement, err := cd.c.MeasureDelta(&CheckConfig{ + ComparisonMode: t.ComparisonMode, + Query: fmt.Sprintf(cd.CPUQueryAbsolute, i), + Start: t.Start, + End: t.End, + WarmUpDuration: t.WarmUpDuration, + }) + if err != nil { + return fmt.Errorf("cpu leak check failed: %w", err) + } + cpuMeasurements = append(cpuMeasurements, cpuMeasurement) + if memMeasurement.End >= t.MemoryThreshold { + errs = append(errs, fmt.Errorf( + "Memory leak detected for node %d and interval: [%s -> %s], diff: %.f, comparison mode: %s", + i, t.Start, t.End, memMeasurement.End, t.ComparisonMode, + )) + } + if cpuMeasurement.End >= t.CPUThreshold { + errs = append(errs, fmt.Errorf( + "CPU leak detected for node %d and interval: [%s -> %s], diff: %.f, comparison mode: %s", + i, t.Start, t.End, cpuMeasurement.End, t.ComparisonMode, + )) + } + default: + return fmt.Errorf("comparison mode is incorrect: %s, see available leak.ComparisonMode constants", t.ComparisonMode) + } - // uptime, err := cd.checkContainerUptime(t, i) - // if err != nil { - // errs = append(errs, fmt.Errorf( - // "Container uptime issue for node %d and interval: [%s -> %s], uptime: %.f, err: %w", - // i, t.Start, t.End, uptime, err, - // )) - // } - // uptimes = append(uptimes, uptime) - // } - // framework.L.Info(). - // Any("MemoryDiffs", memMeasurements). - // Any("CPUDiffs", cpuMeasurements). - // Any("Uptimes", uptimes). - // Str("TestDuration", t.End.Sub(t.Start).String()). - // Float64("TestDurationSec", t.End.Sub(t.Start).Seconds()). - // Msg("Leaks info") + uptime, err := cd.checkContainerUptime(t, i) + if err != nil { + errs = append(errs, fmt.Errorf( + "Container uptime issue for node %d and interval: [%s -> %s], uptime: %.f, err: %w", + i, t.Start, t.End, uptime, err, + )) + } + uptimes = append(uptimes, uptime) + } + framework.L.Info(). + Any("MemoryDiffs", memMeasurements). + Any("CPUDiffs", cpuMeasurements). + Any("Uptimes", uptimes). + Str("TestDuration", t.End.Sub(t.Start).String()). + Float64("TestDurationSec", t.End.Sub(t.Start).Seconds()). + Msg("Leaks info") - // profilesToDump := []string{DefaultProfileType, "memory:inuse_space:bytes:space:bytes"} - // framework.L.Info().Msgf("Downloading %d pprof profiles..", len(profilesToDump)) - // dumper := NewProfileDumper(framework.LocalPyroscopeBaseURL) + if cd.dumpPyroscopeProfiles { + profilesToDump := []string{DefaultProfileType, "memory:inuse_space:bytes:space:bytes"} + framework.L.Info().Msgf("Downloading %d pprof profiles..", len(profilesToDump)) + dumper := NewProfileDumper(framework.LocalPyroscopeBaseURL) - // for _, profileType := range profilesToDump { - // profileSplit := strings.Split(profileType, ":") - // outputPath := DefaultOutputPath - // if len(profileSplit) > 1 { - // // e.g. for "memory:inuse_space:bytes:space:bytes" we want to have output file "memory-inuse_space.pprof" - // outputPath = fmt.Sprintf("%s-%s.pprof", profileSplit[0], profileSplit[1]) - // } - // profilePath, err := dumper.MemoryProfile(&ProfileDumperConfig{ - // ServiceName: "chainlink-node", - // ProfileType: profileType, - // OutputPath: outputPath, - // }) - // if err != nil { - // errs = append(errs, fmt.Errorf("failed to download Pyroscope profile %s: %w", profileType, err)) - // return errors.Join(errs...) - // } - // framework.L.Info().Str("Path", profilePath).Str("ProfileType", profileType).Msg("Saved pprof profile") - // } + for _, profileType := range profilesToDump { + profileSplit := strings.Split(profileType, ":") + outputPath := DefaultOutputPath + if len(profileSplit) > 1 { + // e.g. for "memory:inuse_space:bytes:space:bytes" we want to have output file "memory-inuse_space.pprof" + outputPath = fmt.Sprintf("%s-%s.pprof", profileSplit[0], profileSplit[1]) + } + profilePath, err := dumper.MemoryProfile(&ProfileDumperConfig{ + ServiceName: "chainlink-node", + ProfileType: profileType, + OutputPath: outputPath, + }) + if err != nil { + errs = append(errs, fmt.Errorf("failed to download Pyroscope profile %s: %w", profileType, err)) + return errors.Join(errs...) + } + framework.L.Info().Str("Path", profilePath).Str("ProfileType", profileType).Msg("Saved pprof profile") + } + } - ctx, cancel := context.WithTimeout(context.Background(), DefaultNodeProfileDumpTimeout) - defer cancel() - if err := DumpNodeProfiles(ctx, cd.nodesetName+"-node", DefaultAdminProfilesDir); err != nil { - framework.L.Error().Err(err).Msg("Failed to dump node profiles") - errs = append(errs, fmt.Errorf("failed to dump node profiles: %w", err)) + if cd.dumpAdminProfiles { + framework.L.Info().Msg("Dumping admin profiles..") + ctx, cancel := context.WithTimeout(context.Background(), DefaultNodeProfileDumpTimeout) + defer cancel() + if err := DumpNodeProfiles(ctx, cd.nodesetName+"-node", DefaultAdminProfilesDir); err != nil { + framework.L.Error().Err(err).Msg("Failed to dump node profiles") + errs = append(errs, fmt.Errorf("failed to dump node profiles: %w", err)) + } + framework.L.Info().Str("Path", DefaultAdminProfilesDir).Msg("Admin profiles dumped successfully") } return errors.Join(errs...) From cd082d2e583411191720fbced04b3b9c771be915 Mon Sep 17 00:00:00 2001 From: Bartek Tofel Date: Wed, 15 Apr 2026 08:43:25 +0200 Subject: [PATCH 8/9] temporarily bump http write timeout to 6m to allow big pprof dumping --- framework/components/clnode/default.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/framework/components/clnode/default.go b/framework/components/clnode/default.go index a03277c80..66aed9b0c 100644 --- a/framework/components/clnode/default.go +++ b/framework/components/clnode/default.go @@ -20,7 +20,7 @@ const defaultConfigTmpl = ` Level = 'debug' [WebServer] -HTTPWriteTimeout = '30s' +HTTPWriteTimeout = '360s' SecureCookies = false HTTPPort = {{.HTTPPort}} From 1d24b9f158ed93a221fd1415102d398d7b0f4498 Mon Sep 17 00:00:00 2001 From: Bartek Tofel Date: Thu, 16 Apr 2026 10:04:44 +0200 Subject: [PATCH 9/9] parallelise profile dumping --- framework/leak/node_dumper.go | 86 +++++++++++++++++++++++------------ 1 file changed, 56 insertions(+), 30 deletions(-) diff --git a/framework/leak/node_dumper.go b/framework/leak/node_dumper.go index cdc8e602a..285cc3307 100644 --- a/framework/leak/node_dumper.go +++ b/framework/leak/node_dumper.go @@ -10,12 +10,14 @@ import ( "path/filepath" "regexp" "strings" + "sync" "time" "github.com/docker/docker/api/types/container" "github.com/docker/docker/client" f "github.com/smartcontractkit/chainlink-testing-framework/framework" "github.com/smartcontractkit/chainlink-testing-framework/framework/components/clnode" + "golang.org/x/sync/errgroup" ) var containerNameSanitizer = regexp.MustCompile(`[^a-zA-Z0-9._-]`) @@ -23,6 +25,8 @@ var containerNameSanitizer = regexp.MustCompile(`[^a-zA-Z0-9._-]`) const ( DefaultAdminProfilesDir = "admin-profiles" DefaultNodeProfileDumpTimeout = 5 * time.Minute + // maxConcurrentNodeProfileDumps limits parallel Docker exec/copy work so the daemon is not flooded. + maxConcurrentNodeProfileDumps = 4 ) // DumpNodeProfiles runs chainlink profile collection in each running container @@ -59,41 +63,63 @@ func DumpNodeProfiles(ctx context.Context, namePattern, dst string) error { return err } - var errs []error + targets := make([]runningContainer, 0) for _, c := range containers { - if !strings.Contains(c.name, namePattern) { - continue - } - - // Keep destination names safe and filesystem-friendly. - safeName := containerNameSanitizer.ReplaceAllString(c.name, "_") - targetArchivePath := filepath.Join(dst, fmt.Sprintf("profile-%s.tar", safeName)) - - if err := loginCLINodeAdmin(ctx, cli, c); err != nil { - return err - } - - f.L.Info().Str("ContainerName", c.name).Msg("Collecting node profile") - - out, execErr := dc.ExecContainerWithContext( - ctx, - c.name, - []string{"chainlink", "admin", "profile", "-seconds", "1", "-output_dir", "./profiles"}, - ) - if execErr != nil { - errs = append(errs, fmt.Errorf("failed to execute profile command in container %s: %w, output: %s", c.name, execErr, strings.TrimSpace(out))) - continue - } - - profilesPath := path.Clean(path.Join(c.workingDir, "profiles")) - if copyErr := dc.CopyFromContainerToTarWithContext(ctx, c.name, profilesPath, targetArchivePath); copyErr != nil { - errs = append(errs, fmt.Errorf("failed to copy profiles archive from container %s to %s: %w", c.name, targetArchivePath, copyErr)) - continue + if strings.Contains(c.name, namePattern) { + targets = append(targets, c) } + } - f.L.Info().Str("ContainerName", c.name).Str("Destination", targetArchivePath).Msg("Profiles copied as archive") + var ( + mu sync.Mutex + errs []error + ) + g := new(errgroup.Group) + g.SetLimit(maxConcurrentNodeProfileDumps) + + for _, c := range targets { + c := c + g.Go(func() error { + safeName := containerNameSanitizer.ReplaceAllString(c.name, "_") + targetArchivePath := filepath.Join(dst, fmt.Sprintf("profile-%s.tar", safeName)) + + if err := loginCLINodeAdmin(ctx, cli, c); err != nil { + mu.Lock() + errs = append(errs, err) + mu.Unlock() + return nil + } + + f.L.Info().Str("ContainerName", c.name).Msg("Collecting node profile") + + out, execErr := dc.ExecContainerWithContext( + ctx, + c.name, + []string{"chainlink", "admin", "profile", "-seconds", "1", "-output_dir", "./profiles"}, + ) + if execErr != nil { + mu.Lock() + errs = append(errs, fmt.Errorf("failed to execute profile command in container %s: %w, output: %s", c.name, execErr, strings.TrimSpace(out))) + mu.Unlock() + return nil + } + + profilesPath := path.Clean(path.Join(c.workingDir, "profiles")) + if copyErr := dc.CopyFromContainerToTarWithContext(ctx, c.name, profilesPath, targetArchivePath); copyErr != nil { + mu.Lock() + errs = append(errs, fmt.Errorf("failed to copy profiles archive from container %s to %s: %w", c.name, targetArchivePath, copyErr)) + mu.Unlock() + return nil + } + + f.L.Info().Str("ContainerName", c.name).Str("Destination", targetArchivePath).Msg("Profiles copied as archive") + return nil + }) } + if err := g.Wait(); err != nil { + return err + } return errors.Join(errs...) }