From a5e3a2db353b33d49153dce19a632634a589fbb3 Mon Sep 17 00:00:00 2001 From: Daniel McIlvaney Date: Mon, 27 Apr 2026 13:43:35 -0700 Subject: [PATCH 1/3] fix(render): Scrub submodules from distgits --- .../app/azldev/core/sources/sourceprep.go | 63 ++++++- .../core/sources/sourceprep_internal_test.go | 167 ++++++++++++++++++ 2 files changed, 224 insertions(+), 6 deletions(-) create mode 100644 internal/app/azldev/core/sources/sourceprep_internal_test.go diff --git a/internal/app/azldev/core/sources/sourceprep.go b/internal/app/azldev/core/sources/sourceprep.go index 71ef5681..438af5a6 100644 --- a/internal/app/azldev/core/sources/sourceprep.go +++ b/internal/app/azldev/core/sources/sourceprep.go @@ -8,7 +8,6 @@ import ( "errors" "fmt" "log/slog" - "os" "path/filepath" "slices" "strings" @@ -16,6 +15,7 @@ import ( "unicode" gogit "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing/filemode" "github.com/go-git/go-git/v5/plumbing/object" "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/components" "github.com/microsoft/azure-linux-dev-tools/internal/global/opctx" @@ -396,13 +396,12 @@ func (p *sourcePreparerImpl) trySyntheticHistory( gitDirPath := filepath.Join(sourcesDirPath, ".git") - _, statErr := os.Stat(gitDirPath) - - if statErr != nil && !os.IsNotExist(statErr) { - return fmt.Errorf("failed to check for .git directory at %#q:\n%w", gitDirPath, statErr) + gitDirExists, err := fileutils.Exists(p.fs, gitDirPath) + if err != nil { + return fmt.Errorf("failed to check for .git directory at %#q:\n%w", gitDirPath, err) } - if os.IsNotExist(statErr) { + if !gitDirExists { slog.Info("No .git directory in sources; initializing repository", "component", componentName) @@ -417,6 +416,12 @@ func (p *sourcePreparerImpl) trySyntheticHistory( return fmt.Errorf("failed to open sources repository at %#q:\n%w", sourcesDirPath, err) } + // Strip bogus submodule entries from the index before staging. Some upstream + // repos have gitlink entries without .gitmodules that break go-git's staging. + if err := removeSubmoduleEntries(p.fs, sourcesRepo, sourcesDirPath); err != nil { + return fmt.Errorf("failed to remove submodule entries:\n%w", err) + } + if err := CommitInterleavedHistory(sourcesRepo, changes, importCommit); err != nil { return fmt.Errorf("failed to commit synthetic history:\n%w", err) } @@ -424,6 +429,52 @@ func (p *sourcePreparerImpl) trySyntheticHistory( return nil } +// removeSubmoduleEntries strips gitlink (mode 160000) entries from the repository +// index and removes the corresponding empty directories from disk. Some upstream +// dist-git repositories (e.g., Fedora's "at" package) contain bogus submodule +// references — gitlink entries without a .gitmodules file — that leave empty +// directories after cloning. go-git's [gogit.Worktree.AddWithOptions] fails when +// it encounters these because it tries to read the directory path as a file. +func removeSubmoduleEntries(fs opctx.FS, repo *gogit.Repository, repoDir string) error { + idx, err := repo.Storer.Index() + if err != nil { + return fmt.Errorf("failed to read git index:\n%w", err) + } + + originalLen := len(idx.Entries) + kept := 0 + + for _, entry := range idx.Entries { + if entry.Mode == filemode.Submodule { + slog.Debug("Removing bogus submodule entry from index", + "path", entry.Name) + + // Remove the empty directory left by the uninitialized submodule. + _ = fs.Remove(filepath.Join(repoDir, entry.Name)) + + continue + } + + idx.Entries[kept] = entry + kept++ + } + + if kept == originalLen { + return nil + } + + idx.Entries = idx.Entries[:kept] + + if err := repo.Storer.SetIndex(idx); err != nil { + return fmt.Errorf("failed to update git index:\n%w", err) + } + + slog.Info("Removed submodule entries from git index", + "count", originalLen-kept) + + return nil +} + // DiffSources implements the [SourcePreparer] interface. // It fetches the component's sources once, copies them to a second directory, applies overlays // to the copy, then diffs the two trees. This avoids fetching the sources twice. diff --git a/internal/app/azldev/core/sources/sourceprep_internal_test.go b/internal/app/azldev/core/sources/sourceprep_internal_test.go new file mode 100644 index 00000000..598fc79b --- /dev/null +++ b/internal/app/azldev/core/sources/sourceprep_internal_test.go @@ -0,0 +1,167 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package sources + +import ( + "path/filepath" + "testing" + + gogit "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/filemode" + "github.com/go-git/go-git/v5/plumbing/format/index" + "github.com/go-git/go-git/v5/storage/memory" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils" + "github.com/spf13/afero" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestRemoveSubmoduleEntries_StripsGitlinks(t *testing.T) { + const repoDir = "/fakerepo" + + memFS := afero.NewMemMapFs() + storer := memory.NewStorage() + + // Initialize a repo with in-memory storage and a real working tree path. + repo, err := gogit.Init(storer, nil) + require.NoError(t, err) + + // Create an initial commit so HEAD exists. + _, err = repo.CommitObject(plumbing.ZeroHash) + require.Error(t, err) // expected — no commits yet + + // Manually build an index with a normal file entry and a submodule entry. + idx := &index.Index{ + Version: 2, + Entries: []*index.Entry{ + { + Name: "regular-file.spec", + Mode: filemode.Regular, + Hash: plumbing.NewHash("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), + }, + { + Name: "tests/at", + Mode: filemode.Submodule, + Hash: plumbing.NewHash("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"), + }, + }, + } + + require.NoError(t, storer.SetIndex(idx)) + + // Create the empty directory that the bogus submodule leaves behind. + submoduleDir := filepath.Join(repoDir, "tests/at") + require.NoError(t, memFS.MkdirAll(submoduleDir, 0o755)) + + // Verify the directory exists before calling removeSubmoduleEntries. + dirExists, err := fileutils.Exists(memFS, submoduleDir) + require.NoError(t, err) + require.True(t, dirExists, "submodule directory should exist before removal") + + // Act + err = removeSubmoduleEntries(memFS, repo, repoDir) + require.NoError(t, err) + + // Assert: index should only have the regular file. + updatedIdx, err := storer.Index() + require.NoError(t, err) + require.Len(t, updatedIdx.Entries, 1) + assert.Equal(t, "regular-file.spec", updatedIdx.Entries[0].Name) + assert.Equal(t, filemode.Regular, updatedIdx.Entries[0].Mode) + + // Assert: empty directory was removed. + dirExists, err = fileutils.Exists(memFS, submoduleDir) + require.NoError(t, err) + assert.False(t, dirExists, "submodule directory should be removed") +} + +func TestRemoveSubmoduleEntries_NoOpWithoutSubmodules(t *testing.T) { + const repoDir = "/fakerepo" + + memFS := afero.NewMemMapFs() + storer := memory.NewStorage() + + repo, err := gogit.Init(storer, nil) + require.NoError(t, err) + + // Index with only normal entries. + idx := &index.Index{ + Version: 2, + Entries: []*index.Entry{ + { + Name: "file-a.spec", + Mode: filemode.Regular, + Hash: plumbing.NewHash("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), + }, + { + Name: "file-b.patch", + Mode: filemode.Regular, + Hash: plumbing.NewHash("cccccccccccccccccccccccccccccccccccccccc"), + }, + }, + } + + require.NoError(t, storer.SetIndex(idx)) + + err = removeSubmoduleEntries(memFS, repo, repoDir) + require.NoError(t, err) + + // Index should be untouched. + updatedIdx, err := storer.Index() + require.NoError(t, err) + require.Len(t, updatedIdx.Entries, 2) +} + +func TestRemoveSubmoduleEntries_PreservesNormalEntriesWithMixedModes(t *testing.T) { + const repoDir = "/fakerepo" + + memFS := afero.NewMemMapFs() + storer := memory.NewStorage() + + repo, err := gogit.Init(storer, nil) + require.NoError(t, err) + + // Mix of regular files, executable, and submodule entries. + idx := &index.Index{ + Version: 2, + Entries: []*index.Entry{ + { + Name: "build.sh", + Mode: filemode.Executable, + Hash: plumbing.NewHash("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), + }, + { + Name: "tests/submod1", + Mode: filemode.Submodule, + Hash: plumbing.NewHash("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"), + }, + { + Name: "pkg.spec", + Mode: filemode.Regular, + Hash: plumbing.NewHash("cccccccccccccccccccccccccccccccccccccccc"), + }, + { + Name: "tests/submod2", + Mode: filemode.Submodule, + Hash: plumbing.NewHash("dddddddddddddddddddddddddddddddddddddd"), + }, + }, + } + + require.NoError(t, storer.SetIndex(idx)) + + // Create empty dirs for both submodules. + require.NoError(t, memFS.MkdirAll(filepath.Join(repoDir, "tests/submod1"), 0o755)) + require.NoError(t, memFS.MkdirAll(filepath.Join(repoDir, "tests/submod2"), 0o755)) + + err = removeSubmoduleEntries(memFS, repo, repoDir) + require.NoError(t, err) + + updatedIdx, err := storer.Index() + require.NoError(t, err) + require.Len(t, updatedIdx.Entries, 2) + assert.Equal(t, "build.sh", updatedIdx.Entries[0].Name) + assert.Equal(t, "pkg.spec", updatedIdx.Entries[1].Name) +} From 274043ad9186e6725cd42bdc143d9149c9c494ee Mon Sep 17 00:00:00 2001 From: Daniel McIlvaney Date: Thu, 30 Apr 2026 14:08:37 -0700 Subject: [PATCH 2/3] address feedback Co-authored-by: Copilot --- internal/app/azldev/core/sources/sourceprep.go | 7 +++++-- .../app/azldev/core/sources/sourceprep_internal_test.go | 9 +++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/internal/app/azldev/core/sources/sourceprep.go b/internal/app/azldev/core/sources/sourceprep.go index 438af5a6..441de999 100644 --- a/internal/app/azldev/core/sources/sourceprep.go +++ b/internal/app/azldev/core/sources/sourceprep.go @@ -449,8 +449,11 @@ func removeSubmoduleEntries(fs opctx.FS, repo *gogit.Repository, repoDir string) slog.Debug("Removing bogus submodule entry from index", "path", entry.Name) - // Remove the empty directory left by the uninitialized submodule. - _ = fs.Remove(filepath.Join(repoDir, entry.Name)) + // Remove the directory left by the uninitialized submodule. + if removeErr := fs.RemoveAll(filepath.Join(repoDir, entry.Name)); removeErr != nil { + slog.Warn("Failed to remove submodule directory", + "path", entry.Name, "error", removeErr) + } continue } diff --git a/internal/app/azldev/core/sources/sourceprep_internal_test.go b/internal/app/azldev/core/sources/sourceprep_internal_test.go index 598fc79b..83c667fb 100644 --- a/internal/app/azldev/core/sources/sourceprep_internal_test.go +++ b/internal/app/azldev/core/sources/sourceprep_internal_test.go @@ -24,14 +24,11 @@ func TestRemoveSubmoduleEntries_StripsGitlinks(t *testing.T) { memFS := afero.NewMemMapFs() storer := memory.NewStorage() - // Initialize a repo with in-memory storage and a real working tree path. + // Initialize a repo with in-memory storage only; this test exercises the + // index/storer and uses memFS separately for directory cleanup assertions. repo, err := gogit.Init(storer, nil) require.NoError(t, err) - // Create an initial commit so HEAD exists. - _, err = repo.CommitObject(plumbing.ZeroHash) - require.Error(t, err) // expected — no commits yet - // Manually build an index with a normal file entry and a submodule entry. idx := &index.Index{ Version: 2, @@ -145,7 +142,7 @@ func TestRemoveSubmoduleEntries_PreservesNormalEntriesWithMixedModes(t *testing. { Name: "tests/submod2", Mode: filemode.Submodule, - Hash: plumbing.NewHash("dddddddddddddddddddddddddddddddddddddd"), + Hash: plumbing.NewHash("dddddddddddddddddddddddddddddddddddddddd"), }, }, } From d20051d82d8356c74139a9c920fe20ba61b3a782 Mon Sep 17 00:00:00 2001 From: Daniel McIlvaney Date: Thu, 30 Apr 2026 15:24:32 -0700 Subject: [PATCH 3/3] address feedback --- internal/app/azldev/core/sources/sourceprep.go | 2 +- .../app/azldev/core/sources/sourceprep_internal_test.go | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/internal/app/azldev/core/sources/sourceprep.go b/internal/app/azldev/core/sources/sourceprep.go index 441de999..4a9ac83d 100644 --- a/internal/app/azldev/core/sources/sourceprep.go +++ b/internal/app/azldev/core/sources/sourceprep.go @@ -446,7 +446,7 @@ func removeSubmoduleEntries(fs opctx.FS, repo *gogit.Repository, repoDir string) for _, entry := range idx.Entries { if entry.Mode == filemode.Submodule { - slog.Debug("Removing bogus submodule entry from index", + slog.Info("Removing bogus submodule entry from index", "path", entry.Name) // Remove the directory left by the uninitialized submodule. diff --git a/internal/app/azldev/core/sources/sourceprep_internal_test.go b/internal/app/azldev/core/sources/sourceprep_internal_test.go index 83c667fb..491c3a49 100644 --- a/internal/app/azldev/core/sources/sourceprep_internal_test.go +++ b/internal/app/azldev/core/sources/sourceprep_internal_test.go @@ -12,6 +12,7 @@ import ( "github.com/go-git/go-git/v5/plumbing/filemode" "github.com/go-git/go-git/v5/plumbing/format/index" "github.com/go-git/go-git/v5/storage/memory" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileperms" "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils" "github.com/spf13/afero" "github.com/stretchr/testify/assert" @@ -50,7 +51,7 @@ func TestRemoveSubmoduleEntries_StripsGitlinks(t *testing.T) { // Create the empty directory that the bogus submodule leaves behind. submoduleDir := filepath.Join(repoDir, "tests/at") - require.NoError(t, memFS.MkdirAll(submoduleDir, 0o755)) + require.NoError(t, memFS.MkdirAll(submoduleDir, fileperms.PublicDir)) // Verify the directory exists before calling removeSubmoduleEntries. dirExists, err := fileutils.Exists(memFS, submoduleDir) @@ -150,8 +151,8 @@ func TestRemoveSubmoduleEntries_PreservesNormalEntriesWithMixedModes(t *testing. require.NoError(t, storer.SetIndex(idx)) // Create empty dirs for both submodules. - require.NoError(t, memFS.MkdirAll(filepath.Join(repoDir, "tests/submod1"), 0o755)) - require.NoError(t, memFS.MkdirAll(filepath.Join(repoDir, "tests/submod2"), 0o755)) + require.NoError(t, memFS.MkdirAll(filepath.Join(repoDir, "tests/submod1"), fileperms.PublicDir)) + require.NoError(t, memFS.MkdirAll(filepath.Join(repoDir, "tests/submod2"), fileperms.PublicDir)) err = removeSubmoduleEntries(memFS, repo, repoDir) require.NoError(t, err)