Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions framework/.changeset/v0.15.16.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
- Dump two memory profiles: inuse and alloc
- Dump LOOPs profiles via admin command
7 changes: 1 addition & 6 deletions framework/components/clnode/default.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,8 @@ const defaultConfigTmpl = `
[Log]
Level = 'debug'

[Pyroscope]
ServerAddress = 'http://pyroscope:4040'
Environment = 'local'
LinkTracesToProfiles = true
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When Pyroscope is configured, then dumping admin profiles fails with error collecting /v2/debug/pprof/profile?seconds=1: status 500: Could not enable CPU profiling: cpu profiling already in use


[WebServer]
HTTPWriteTimeout = '30s'
HTTPWriteTimeout = '360s'
SecureCookies = false
HTTPPort = {{.HTTPPort}}

Expand Down
41 changes: 41 additions & 0 deletions framework/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,47 @@ func (dc *DockerClient) CopyFile(containerName, sourceFile, targetPath string) e
return dc.copyToContainer(containerID, sourceFile, targetPath)
}

// CopyFromContainer copies files from a container path and returns a tar archive stream.
func (dc *DockerClient) CopyFromContainer(containerName, sourcePath string) (io.ReadCloser, container.PathStat, error) {
return dc.CopyFromContainerWithContext(context.Background(), containerName, sourcePath)
}

// CopyFromContainerWithContext copies files from a container path and returns a tar archive stream.
func (dc *DockerClient) CopyFromContainerWithContext(ctx context.Context, containerName, sourcePath string) (io.ReadCloser, container.PathStat, error) {
containerID, err := dc.findContainerIDByName(ctx, containerName)
if err != nil {
return nil, container.PathStat{}, fmt.Errorf("failed to find container ID by name: %s", containerName)
}
reader, stat, err := dc.cli.CopyFromContainer(ctx, containerID, sourcePath)
if err != nil {
return nil, container.PathStat{}, fmt.Errorf("could not copy from container %s path %s: %w", containerName, sourcePath, err)
}
return reader, stat, nil
}

// CopyFromContainerToTarWithContext writes the Docker copy tar stream to targetTarPath.
func (dc *DockerClient) CopyFromContainerToTarWithContext(ctx context.Context, containerName, sourcePath, targetTarPath string) error {
reader, _, err := dc.CopyFromContainerWithContext(ctx, containerName, sourcePath)
if err != nil {
return err
}
defer reader.Close()

if err := os.MkdirAll(filepath.Dir(targetTarPath), 0o755); err != nil {
return fmt.Errorf("failed to create destination directory for %s: %w", targetTarPath, err)
}
file, err := os.Create(targetTarPath)
if err != nil {
return fmt.Errorf("failed to create destination archive %s: %w", targetTarPath, err)
}
defer file.Close()

if _, err := io.Copy(file, reader); err != nil {
return fmt.Errorf("failed to write archive %s: %w", targetTarPath, err)
}
return nil
}

// findContainerIDByName finds a container ID by its name
func (dc *DockerClient) findContainerIDByName(ctx context.Context, containerName string) (string, error) {
containers, err := dc.cli.ContainerList(ctx, container.ListOptions{
Expand Down
75 changes: 65 additions & 10 deletions framework/leak/detector_cl_node.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package leak

import (
"context"
"errors"
"fmt"
"strconv"
Expand Down Expand Up @@ -43,7 +44,9 @@ type CLNodesLeakDetector struct {
ContainerAliveQuery string
c *ResourceLeakChecker

nodesetName string
nodesetName string
dumpPyroscopeProfiles bool
dumpAdminProfiles bool
}

// WithCPUQuery allows to override CPU leak query (Prometheus)
Expand All @@ -70,6 +73,22 @@ func WithNodesetName(name string) func(*CLNodesLeakDetector) {
}
}

// WithDumpPyroscopeProfiles allows to dump Pyroscope profiles for each node at the end of the test.
// Dumped profiles are aggragate (cumulative) profiles from the whole test duration.
func WithDumpPyroscopeProfiles(dump bool) func(*CLNodesLeakDetector) {
return func(cd *CLNodesLeakDetector) {
cd.dumpPyroscopeProfiles = dump
}
}

// WithDumpAdminProfiles allows to dump admin profiles for each node at the end of the test.
// Uses CL node's debug endpoint to fetch pprof snapshots.
func WithDumpAdminProfiles(dump bool) func(*CLNodesLeakDetector) {
return func(cd *CLNodesLeakDetector) {
cd.dumpAdminProfiles = dump
}
}

// sanitizeNodesetName escapes characters that would corrupt fmt.Sprintf format strings
// or invalidate PromQL double-quoted label literals.
func sanitizeNodesetName(name string) string {
Expand Down Expand Up @@ -108,6 +127,15 @@ func NewCLNodesLeakDetector(c *ResourceLeakChecker, opts ...func(*CLNodesLeakDet
cd.MemoryQueryAbsolute = replaceNodeset(cd.MemoryQueryAbsolute)
}

if cd.dumpPyroscopeProfiles == true && cd.dumpAdminProfiles == true {
return nil, fmt.Errorf("both Pyroscope and admin profile dumping enabled, please choose only one. Dumping admin profiles will fail if Pyroscope is enabled.")
}

if cd.dumpAdminProfiles == false && cd.dumpPyroscopeProfiles == false {
// default to dumping admin profiles since that's what engineers prefer
cd.dumpAdminProfiles = true
}

return cd, nil
}

Expand Down Expand Up @@ -243,15 +271,42 @@ func (cd *CLNodesLeakDetector) Check(t *CLNodesCheck) error {
Str("TestDuration", t.End.Sub(t.Start).String()).
Float64("TestDurationSec", t.End.Sub(t.Start).Seconds()).
Msg("Leaks info")
framework.L.Info().Msg("Downloading pprof profile..")
dumper := NewProfileDumper(framework.LocalPyroscopeBaseURL)
profilePath, err := dumper.MemoryProfile(&ProfileDumperConfig{
ServiceName: "chainlink-node",
})
if err != nil {
errs = append(errs, fmt.Errorf("failed to download Pyroscopt profile: %w", err))
return errors.Join(errs...)

if cd.dumpPyroscopeProfiles {
profilesToDump := []string{DefaultProfileType, "memory:inuse_space:bytes:space:bytes"}
framework.L.Info().Msgf("Downloading %d pprof profiles..", len(profilesToDump))
dumper := NewProfileDumper(framework.LocalPyroscopeBaseURL)

for _, profileType := range profilesToDump {
profileSplit := strings.Split(profileType, ":")
outputPath := DefaultOutputPath
if len(profileSplit) > 1 {
// e.g. for "memory:inuse_space:bytes:space:bytes" we want to have output file "memory-inuse_space.pprof"
outputPath = fmt.Sprintf("%s-%s.pprof", profileSplit[0], profileSplit[1])
}
profilePath, err := dumper.MemoryProfile(&ProfileDumperConfig{
ServiceName: "chainlink-node",
ProfileType: profileType,
OutputPath: outputPath,
})
if err != nil {
errs = append(errs, fmt.Errorf("failed to download Pyroscope profile %s: %w", profileType, err))
return errors.Join(errs...)
}
framework.L.Info().Str("Path", profilePath).Str("ProfileType", profileType).Msg("Saved pprof profile")
}
}

if cd.dumpAdminProfiles {
framework.L.Info().Msg("Dumping admin profiles..")
ctx, cancel := context.WithTimeout(context.Background(), DefaultNodeProfileDumpTimeout)
defer cancel()
if err := DumpNodeProfiles(ctx, cd.nodesetName+"-node", DefaultAdminProfilesDir); err != nil {
framework.L.Error().Err(err).Msg("Failed to dump node profiles")
errs = append(errs, fmt.Errorf("failed to dump node profiles: %w", err))
}
framework.L.Info().Str("Path", DefaultAdminProfilesDir).Msg("Admin profiles dumped successfully")
}
framework.L.Info().Str("Path", profilePath).Msg("Saved pprof profile")

return errors.Join(errs...)
}
Loading
Loading